mirror of
https://github.com/KnugiHK/WhatsApp-Chat-Exporter.git
synced 2026-01-28 21:30:43 +00:00
Merge branch 'dev' into feature/export-reactions
This commit is contained in:
@@ -115,7 +115,7 @@ Do an iPhone/iPad Backup with iTunes/Finder first.
|
||||
|
||||
If you want to work on an encrypted iOS/iPadOS Backup, you should install iphone_backup_decrypt from [KnugiHK/iphone_backup_decrypt](https://github.com/KnugiHK/iphone_backup_decrypt) before you run the extract_iphone_media.py.
|
||||
```sh
|
||||
pip install git+https://github.com/KnugiHK/iphone_backup_decrypt
|
||||
pip install whatsapp-chat-exporter["ios_backup"]
|
||||
```
|
||||
> [!NOTE]
|
||||
> You will need to disable the built-in end-to-end encryption for WhatsApp backups. See [WhatsApp's FAQ](https://faq.whatsapp.com/490592613091019#turn-off-end-to-end-encrypted-backup) for how to do it.
|
||||
|
||||
@@ -11,14 +11,15 @@ import logging
|
||||
import importlib.metadata
|
||||
from Whatsapp_Chat_Exporter import android_crypt, exported_handler, android_handler
|
||||
from Whatsapp_Chat_Exporter import ios_handler, ios_media_handler
|
||||
from Whatsapp_Chat_Exporter.data_model import ChatCollection, ChatStore
|
||||
from Whatsapp_Chat_Exporter.utility import APPLE_TIME, CLEAR_LINE, Crypt, check_update
|
||||
from Whatsapp_Chat_Exporter.data_model import ChatCollection, ChatStore, Timing
|
||||
from Whatsapp_Chat_Exporter.utility import APPLE_TIME, CLEAR_LINE, CURRENT_TZ_OFFSET, Crypt
|
||||
from Whatsapp_Chat_Exporter.utility import readable_to_bytes, safe_name, bytes_to_readable
|
||||
from Whatsapp_Chat_Exporter.utility import import_from_json, incremental_merge, DbType
|
||||
from Whatsapp_Chat_Exporter.utility import telegram_json_format
|
||||
from Whatsapp_Chat_Exporter.utility import import_from_json, incremental_merge, check_update
|
||||
from Whatsapp_Chat_Exporter.utility import telegram_json_format, convert_time_unit, DbType
|
||||
from argparse import ArgumentParser, SUPPRESS
|
||||
from datetime import datetime
|
||||
from getpass import getpass
|
||||
from tqdm import tqdm
|
||||
from sys import exit
|
||||
from typing import Optional, List, Dict
|
||||
from Whatsapp_Chat_Exporter.vcards_contacts import ContactsFromVCards
|
||||
@@ -286,13 +287,17 @@ def setup_argument_parser() -> ArgumentParser:
|
||||
help="Specify the chunk size for decrypting iOS backup, which may affect the decryption speed."
|
||||
)
|
||||
misc_group.add_argument(
|
||||
"--max-bruteforce-worker", dest="max_bruteforce_worker", default=10, type=int,
|
||||
"--max-bruteforce-worker", dest="max_bruteforce_worker", default=4, type=int,
|
||||
help="Specify the maximum number of worker for bruteforce decryption."
|
||||
)
|
||||
misc_group.add_argument(
|
||||
"--no-banner", dest="no_banner", default=False, action='store_true',
|
||||
help="Do not show the banner"
|
||||
)
|
||||
misc_group.add_argument(
|
||||
"--fix-dot-files", dest="fix_dot_files", default=False, action='store_true',
|
||||
help="Fix files with a dot at the end of their name (allowing the outputs be stored in FAT filesystems)"
|
||||
)
|
||||
|
||||
return parser
|
||||
|
||||
@@ -537,6 +542,7 @@ def process_messages(args, data: ChatCollection) -> None:
|
||||
exit(6)
|
||||
|
||||
filter_chat = (args.filter_chat_include, args.filter_chat_exclude)
|
||||
timing = Timing(args.timezone_offset if args.timezone_offset else CURRENT_TZ_OFFSET)
|
||||
|
||||
with sqlite3.connect(msg_db) as db:
|
||||
db.row_factory = sqlite3.Row
|
||||
@@ -548,14 +554,14 @@ def process_messages(args, data: ChatCollection) -> None:
|
||||
message_handler = ios_handler
|
||||
|
||||
message_handler.messages(
|
||||
db, data, args.media, args.timezone_offset, args.filter_date,
|
||||
db, data, args.media, timing, args.filter_date,
|
||||
filter_chat, args.filter_empty, args.no_reply_ios
|
||||
)
|
||||
|
||||
# Process media
|
||||
message_handler.media(
|
||||
db, data, args.media, args.filter_date,
|
||||
filter_chat, args.filter_empty, args.separate_media
|
||||
filter_chat, args.filter_empty, args.separate_media, args.fix_dot_files
|
||||
)
|
||||
|
||||
# Process vcards
|
||||
@@ -565,17 +571,17 @@ def process_messages(args, data: ChatCollection) -> None:
|
||||
)
|
||||
|
||||
# Process calls
|
||||
process_calls(args, db, data, filter_chat)
|
||||
process_calls(args, db, data, filter_chat, timing)
|
||||
|
||||
|
||||
def process_calls(args, db, data: ChatCollection, filter_chat) -> None:
|
||||
def process_calls(args, db, data: ChatCollection, filter_chat, timing) -> None:
|
||||
"""Process call history if available."""
|
||||
if args.android:
|
||||
android_handler.calls(db, data, args.timezone_offset, filter_chat)
|
||||
android_handler.calls(db, data, timing, filter_chat)
|
||||
elif args.ios and args.call_db_ios is not None:
|
||||
with sqlite3.connect(args.call_db_ios) as cdb:
|
||||
cdb.row_factory = sqlite3.Row
|
||||
ios_handler.calls(cdb, data, args.timezone_offset, filter_chat)
|
||||
ios_handler.calls(cdb, data, timing, filter_chat)
|
||||
|
||||
|
||||
def handle_media_directory(args) -> None:
|
||||
@@ -665,24 +671,27 @@ def export_multiple_json(args, data: Dict) -> None:
|
||||
|
||||
# Export each chat
|
||||
total = len(data.keys())
|
||||
for index, jik in enumerate(data.keys()):
|
||||
if data[jik]["name"] is not None:
|
||||
contact = data[jik]["name"].replace('/', '')
|
||||
else:
|
||||
contact = jik.replace('+', '')
|
||||
with tqdm(total=total, desc="Generating JSON files", unit="file", leave=False) as pbar:
|
||||
for jik in data.keys():
|
||||
if data[jik]["name"] is not None:
|
||||
contact = data[jik]["name"].replace('/', '')
|
||||
else:
|
||||
contact = jik.replace('+', '')
|
||||
|
||||
if args.telegram:
|
||||
messages = telegram_json_format(jik, data[jik], args.timezone_offset)
|
||||
else:
|
||||
messages = {jik: data[jik]}
|
||||
with open(f"{json_path}/{safe_name(contact)}.json", "w") as f:
|
||||
file_content = json.dumps(
|
||||
messages,
|
||||
ensure_ascii=not args.avoid_encoding_json,
|
||||
indent=args.pretty_print_json
|
||||
)
|
||||
f.write(file_content)
|
||||
logger.info(f"Writing JSON file...({index + 1}/{total})\r")
|
||||
if args.telegram:
|
||||
messages = telegram_json_format(jik, data[jik], args.timezone_offset)
|
||||
else:
|
||||
messages = {jik: data[jik]}
|
||||
with open(f"{json_path}/{safe_name(contact)}.json", "w") as f:
|
||||
file_content = json.dumps(
|
||||
messages,
|
||||
ensure_ascii=not args.avoid_encoding_json,
|
||||
indent=args.pretty_print_json
|
||||
)
|
||||
f.write(file_content)
|
||||
pbar.update(1)
|
||||
total_time = pbar.format_dict['elapsed']
|
||||
logger.info(f"Generated {total} JSON files in {convert_time_unit(total_time)}{CLEAR_LINE}")
|
||||
|
||||
|
||||
def process_exported_chat(args, data: ChatCollection) -> None:
|
||||
|
||||
@@ -1,13 +1,12 @@
|
||||
import time
|
||||
import hmac
|
||||
import io
|
||||
import logging
|
||||
import threading
|
||||
import zlib
|
||||
import concurrent.futures
|
||||
from tqdm import tqdm
|
||||
from typing import Tuple, Union
|
||||
from hashlib import sha256
|
||||
from sys import exit
|
||||
from functools import partial
|
||||
from Whatsapp_Chat_Exporter.utility import CLEAR_LINE, CRYPT14_OFFSETS, Crypt, DbType
|
||||
|
||||
try:
|
||||
@@ -112,13 +111,36 @@ def _decrypt_database(db_ciphertext: bytes, main_key: bytes, iv: bytes) -> bytes
|
||||
zlib.error: If decompression fails.
|
||||
ValueError: if the plaintext is not a SQLite database.
|
||||
"""
|
||||
FOOTER_SIZE = 32
|
||||
if len(db_ciphertext) <= FOOTER_SIZE:
|
||||
raise ValueError("Input data too short to contain a valid GCM tag.")
|
||||
|
||||
actual_ciphertext = db_ciphertext[:-FOOTER_SIZE]
|
||||
tag = db_ciphertext[-FOOTER_SIZE: -FOOTER_SIZE + 16]
|
||||
|
||||
cipher = AES.new(main_key, AES.MODE_GCM, iv)
|
||||
db_compressed = cipher.decrypt(db_ciphertext)
|
||||
db = zlib.decompress(db_compressed)
|
||||
if db[0:6].upper() != b"SQLITE":
|
||||
try:
|
||||
db_compressed = cipher.decrypt_and_verify(actual_ciphertext, tag)
|
||||
except ValueError:
|
||||
# This could be key, IV, or tag is wrong, but likely the key is wrong.
|
||||
raise ValueError("Decryption/Authentication failed. Ensure you are using the correct key.")
|
||||
|
||||
if len(db_compressed) < 2 or db_compressed[0] != 0x78:
|
||||
logger.debug(f"Data passes GCM but is not Zlib. Header: {db_compressed[:2].hex()}")
|
||||
raise ValueError(
|
||||
"The plaintext is not a SQLite database. Ensure you are using the correct key."
|
||||
"Key is correct, but decrypted data is not a valid compressed stream. "
|
||||
"Is this even a valid WhatsApp database backup?"
|
||||
)
|
||||
|
||||
try:
|
||||
db = zlib.decompress(db_compressed)
|
||||
except zlib.error as e:
|
||||
raise zlib.error(f"Decompression failed (The backup file likely corrupted at source): {e}")
|
||||
|
||||
if not db.startswith(b"SQLite"):
|
||||
raise ValueError(
|
||||
"Data is valid and decompressed, but it is not a SQLite database. "
|
||||
"Is this even a valid WhatsApp database backup?")
|
||||
return db
|
||||
|
||||
|
||||
@@ -142,82 +164,69 @@ def _decrypt_crypt14(database: bytes, main_key: bytes, max_worker: int = 10) ->
|
||||
|
||||
# Attempt known offsets first
|
||||
for offsets in CRYPT14_OFFSETS:
|
||||
iv = database[offsets["iv"]:offsets["iv"] + 16]
|
||||
db_ciphertext = database[offsets["db"]:]
|
||||
iv = offsets["iv"]
|
||||
db = offsets["db"]
|
||||
try:
|
||||
decrypted_db = _decrypt_database(db_ciphertext, main_key, iv)
|
||||
decrypted_db = _attempt_decrypt_task((iv, iv + 16, db), database, main_key)
|
||||
except (zlib.error, ValueError):
|
||||
pass # Try next offset
|
||||
continue
|
||||
else:
|
||||
logger.debug(
|
||||
f"Decryption successful with known offsets: IV {offsets['iv']}, DB {offsets['db']}{CLEAR_LINE}"
|
||||
f"Decryption successful with known offsets: IV {iv}, DB {db}{CLEAR_LINE}"
|
||||
)
|
||||
return decrypted_db # Successful decryption
|
||||
|
||||
def animate_message(stop_event):
|
||||
base_msg = "Common offsets failed. Initiating brute-force with multithreading"
|
||||
dots = ["", ".", "..", "..."]
|
||||
i = 0
|
||||
while not stop_event.is_set():
|
||||
logger.info(f"{base_msg}{dots[i % len(dots)]}\x1b[K\r")
|
||||
time.sleep(0.3)
|
||||
i += 1
|
||||
logger.info(f"Common offsets failed but brute-forcing the offset works!{CLEAR_LINE}")
|
||||
|
||||
stop_event = threading.Event()
|
||||
anim_thread = threading.Thread(target=animate_message, args=(stop_event,))
|
||||
anim_thread.start()
|
||||
|
||||
# Convert brute force generator into a list for parallel processing
|
||||
offset_combinations = list(brute_force_offset())
|
||||
|
||||
def attempt_decrypt(offset_tuple):
|
||||
"""Attempt decryption with the given offsets."""
|
||||
start_iv, end_iv, start_db = offset_tuple
|
||||
iv = database[start_iv:end_iv]
|
||||
db_ciphertext = database[start_db:]
|
||||
logger.debug(""f"Trying offsets: IV {start_iv}-{end_iv}, DB {start_db}{CLEAR_LINE}")
|
||||
|
||||
try:
|
||||
db = _decrypt_database(db_ciphertext, main_key, iv)
|
||||
except (zlib.error, ValueError):
|
||||
return None # Decryption failed, move to next
|
||||
else:
|
||||
stop_event.set()
|
||||
anim_thread.join()
|
||||
logger.info(
|
||||
f"The offsets of your IV and database are {start_iv} and "
|
||||
f"{start_db}, respectively. To include your offsets in the "
|
||||
"program, please report it by creating an issue on GitHub: "
|
||||
"https://github.com/KnugiHK/Whatsapp-Chat-Exporter/discussions/47"
|
||||
f"\nShutting down other threads...{CLEAR_LINE}"
|
||||
)
|
||||
return db
|
||||
|
||||
with concurrent.futures.ThreadPoolExecutor(max_worker) as executor:
|
||||
future_to_offset = {executor.submit(attempt_decrypt, offset)
|
||||
: offset for offset in offset_combinations}
|
||||
|
||||
try:
|
||||
for future in concurrent.futures.as_completed(future_to_offset):
|
||||
result = future.result()
|
||||
if result is not None:
|
||||
# Shutdown remaining threads
|
||||
logger.info(f"Common offsets failed. Will attempt to brute-force{CLEAR_LINE}")
|
||||
offset_max = 200
|
||||
workers = max_worker
|
||||
check_offset = partial(_attempt_decrypt_task, database=database, main_key=main_key)
|
||||
all_offsets = list(brute_force_offset(offset_max, offset_max))
|
||||
executor = concurrent.futures.ProcessPoolExecutor(max_workers=workers)
|
||||
try:
|
||||
with tqdm(total=len(all_offsets), desc="Brute-forcing offsets", unit="trial", leave=False) as pbar:
|
||||
results = executor.map(check_offset, all_offsets, chunksize=8)
|
||||
found = False
|
||||
for offset_info, result in zip(all_offsets, results):
|
||||
pbar.update(1)
|
||||
if result:
|
||||
start_iv, _, start_db = offset_info
|
||||
# Clean shutdown on success
|
||||
executor.shutdown(wait=False, cancel_futures=True)
|
||||
return result
|
||||
found = True
|
||||
break
|
||||
if found:
|
||||
logger.info(
|
||||
f"The offsets of your IV and database are {start_iv} and {start_db}, respectively.{CLEAR_LINE}"
|
||||
)
|
||||
logger.info(
|
||||
f"To include your offsets in the expoter, please report it in the discussion thread on GitHub:{CLEAR_LINE}"
|
||||
)
|
||||
logger.info(f"https://github.com/KnugiHK/Whatsapp-Chat-Exporter/discussions/47{CLEAR_LINE}")
|
||||
return result
|
||||
|
||||
except KeyboardInterrupt:
|
||||
stop_event.set()
|
||||
anim_thread.join()
|
||||
logger.info(f"Brute force interrupted by user (Ctrl+C). Shutting down gracefully...{CLEAR_LINE}")
|
||||
executor.shutdown(wait=False, cancel_futures=True)
|
||||
exit(1)
|
||||
finally:
|
||||
stop_event.set()
|
||||
anim_thread.join()
|
||||
except KeyboardInterrupt:
|
||||
executor.shutdown(wait=False, cancel_futures=True)
|
||||
print("\n")
|
||||
raise KeyboardInterrupt(
|
||||
f"Brute force interrupted by user (Ctrl+C). Shutting down gracefully...{CLEAR_LINE}"
|
||||
)
|
||||
|
||||
finally:
|
||||
executor.shutdown(wait=False)
|
||||
|
||||
raise OffsetNotFoundError("Could not find the correct offsets for decryption.")
|
||||
|
||||
def _attempt_decrypt_task(offset_tuple, database, main_key):
|
||||
"""Attempt decryption with the given offsets."""
|
||||
start_iv, end_iv, start_db = offset_tuple
|
||||
iv = database[start_iv:end_iv]
|
||||
db_ciphertext = database[start_db:]
|
||||
|
||||
try:
|
||||
return _decrypt_database(db_ciphertext, main_key, iv)
|
||||
except (zlib.error, ValueError):
|
||||
return None
|
||||
|
||||
|
||||
def _decrypt_crypt12(database: bytes, main_key: bytes) -> bytes:
|
||||
"""Decrypt a crypt12 database.
|
||||
|
||||
@@ -4,13 +4,14 @@ import logging
|
||||
import sqlite3
|
||||
import os
|
||||
import shutil
|
||||
from tqdm import tqdm
|
||||
from pathlib import Path
|
||||
from mimetypes import MimeTypes
|
||||
from markupsafe import escape as htmle
|
||||
from base64 import b64decode, b64encode
|
||||
from datetime import datetime
|
||||
from Whatsapp_Chat_Exporter.data_model import ChatStore, Message
|
||||
from Whatsapp_Chat_Exporter.utility import CLEAR_LINE, CURRENT_TZ_OFFSET, MAX_SIZE, ROW_SIZE, JidType, Device
|
||||
from Whatsapp_Chat_Exporter.data_model import ChatStore, Message, Timing
|
||||
from Whatsapp_Chat_Exporter.utility import CLEAR_LINE, MAX_SIZE, ROW_SIZE, JidType, Device
|
||||
from Whatsapp_Chat_Exporter.utility import rendering, get_file_name, setup_template, get_cond_for_empty
|
||||
from Whatsapp_Chat_Exporter.utility import get_status_location, convert_time_unit, determine_metadata
|
||||
from Whatsapp_Chat_Exporter.utility import get_chat_condition, safe_name, bytes_to_readable
|
||||
@@ -47,12 +48,15 @@ def contacts(db, data, enrich_from_vcards):
|
||||
logger.info(f"Processed {total_row_number} contacts\n")
|
||||
|
||||
c.execute("SELECT jid, COALESCE(display_name, wa_name) as display_name, status FROM wa_contacts;")
|
||||
row = c.fetchone()
|
||||
while row is not None:
|
||||
current_chat = data.add_chat(row["jid"], ChatStore(Device.ANDROID, row["display_name"]))
|
||||
if row["status"] is not None:
|
||||
current_chat.status = row["status"]
|
||||
row = c.fetchone()
|
||||
|
||||
with tqdm(total=total_row_number, desc="Processing contacts", unit="contact", leave=False) as pbar:
|
||||
while (row := _fetch_row_safely(c)) is not None:
|
||||
current_chat = data.add_chat(row["jid"], ChatStore(Device.ANDROID, row["display_name"]))
|
||||
if row["status"] is not None:
|
||||
current_chat.status = row["status"]
|
||||
pbar.update(1)
|
||||
total_time = pbar.format_dict['elapsed']
|
||||
logger.info(f"Processed {total_row_number} contacts in {convert_time_unit(total_time)}{CLEAR_LINE}")
|
||||
|
||||
return True
|
||||
|
||||
@@ -72,7 +76,6 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat,
|
||||
"""
|
||||
c = db.cursor()
|
||||
total_row_number = _get_message_count(c, filter_empty, filter_date, filter_chat)
|
||||
logger.info(f"Processing messages...(0/{total_row_number})\r")
|
||||
|
||||
try:
|
||||
content_cursor = _get_messages_cursor_legacy(c, filter_empty, filter_date, filter_chat)
|
||||
@@ -84,23 +87,12 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat,
|
||||
except Exception as e:
|
||||
raise e
|
||||
|
||||
i = 0
|
||||
# Fetch the first row safely
|
||||
content = _fetch_row_safely(content_cursor)
|
||||
|
||||
while content is not None:
|
||||
_process_single_message(data, content, table_message, timezone_offset)
|
||||
|
||||
i += 1
|
||||
if i % 1000 == 0:
|
||||
logger.info(f"Processing messages...({i}/{total_row_number})\r")
|
||||
|
||||
# Fetch the next row safely
|
||||
content = _fetch_row_safely(content_cursor)
|
||||
|
||||
_get_reactions(db, data)
|
||||
logger.info(f"Processed {total_row_number} messages{CLEAR_LINE}")
|
||||
|
||||
with tqdm(total=total_row_number, desc="Processing messages", unit="msg", leave=False) as pbar:
|
||||
while (content := _fetch_row_safely(content_cursor)) is not None:
|
||||
_process_single_message(data, content, table_message, timezone_offset)
|
||||
pbar.update(1)
|
||||
total_time = pbar.format_dict['elapsed']
|
||||
logger.info(f"Processed {total_row_number} messages in {convert_time_unit(total_time)}{CLEAR_LINE}")
|
||||
|
||||
# Helper functions for message processing
|
||||
|
||||
@@ -126,14 +118,16 @@ def _get_message_count(cursor, filter_empty, filter_date, filter_chat):
|
||||
{include_filter}
|
||||
{exclude_filter}""")
|
||||
except sqlite3.OperationalError:
|
||||
empty_filter = get_cond_for_empty(filter_empty, "jid.raw_string", "broadcast")
|
||||
empty_filter = get_cond_for_empty(filter_empty, "key_remote_jid", "broadcast")
|
||||
date_filter = f'AND timestamp {filter_date}' if filter_date is not None else ''
|
||||
include_filter = get_chat_condition(
|
||||
filter_chat[0], True, ["jid.raw_string", "jid_group.raw_string"], "jid", "android")
|
||||
filter_chat[0], True, ["key_remote_jid", "group_sender_jid"], "jid", "android")
|
||||
exclude_filter = get_chat_condition(
|
||||
filter_chat[1], False, ["jid.raw_string", "jid_group.raw_string"], "jid", "android")
|
||||
filter_chat[1], False, ["key_remote_jid", "group_sender_jid"], "jid", "android")
|
||||
|
||||
cursor.execute(f"""SELECT count()
|
||||
cursor.execute(f"""SELECT count(),
|
||||
COALESCE(lid_global.raw_string, jid.raw_string) as key_remote_jid,
|
||||
COALESCE(lid_group.raw_string, jid_group.raw_string) as group_sender_jid
|
||||
FROM message
|
||||
LEFT JOIN chat
|
||||
ON chat._id = message.chat_row_id
|
||||
@@ -141,6 +135,14 @@ def _get_message_count(cursor, filter_empty, filter_date, filter_chat):
|
||||
ON jid._id = chat.jid_row_id
|
||||
LEFT JOIN jid jid_group
|
||||
ON jid_group._id = message.sender_jid_row_id
|
||||
LEFT JOIN jid_map as jid_map_global
|
||||
ON chat.jid_row_id = jid_map_global.lid_row_id
|
||||
LEFT JOIN jid lid_global
|
||||
ON jid_map_global.jid_row_id = lid_global._id
|
||||
LEFT JOIN jid_map as jid_map_group
|
||||
ON message.sender_jid_row_id = jid_map_group.lid_row_id
|
||||
LEFT JOIN jid lid_group
|
||||
ON jid_map_group.jid_row_id = lid_group._id
|
||||
WHERE 1=1
|
||||
{empty_filter}
|
||||
{date_filter}
|
||||
@@ -219,11 +221,11 @@ def _get_messages_cursor_new(cursor, filter_empty, filter_date, filter_chat):
|
||||
empty_filter = get_cond_for_empty(filter_empty, "key_remote_jid", "broadcast")
|
||||
date_filter = f'AND message.timestamp {filter_date}' if filter_date is not None else ''
|
||||
include_filter = get_chat_condition(
|
||||
filter_chat[0], True, ["key_remote_jid", "jid_group.raw_string"], "jid_global", "android")
|
||||
filter_chat[0], True, ["key_remote_jid", "lid_group.raw_string"], "jid_global", "android")
|
||||
exclude_filter = get_chat_condition(
|
||||
filter_chat[1], False, ["key_remote_jid", "jid_group.raw_string"], "jid_global", "android")
|
||||
filter_chat[1], False, ["key_remote_jid", "lid_group.raw_string"], "jid_global", "android")
|
||||
|
||||
cursor.execute(f"""SELECT jid_global.raw_string as key_remote_jid,
|
||||
cursor.execute(f"""SELECT COALESCE(lid_global.raw_string, jid_global.raw_string) as key_remote_jid,
|
||||
message._id,
|
||||
message.from_me as key_from_me,
|
||||
message.timestamp,
|
||||
@@ -238,7 +240,7 @@ def _get_messages_cursor_new(cursor, filter_empty, filter_date, filter_chat):
|
||||
message.key_id,
|
||||
message_quoted.text_data as quoted_data,
|
||||
message.message_type as media_wa_type,
|
||||
jid_group.raw_string as group_sender_jid,
|
||||
COALESCE(lid_group.raw_string, jid_group.raw_string) as group_sender_jid,
|
||||
chat.subject as chat_subject,
|
||||
missed_call_logs.video_call,
|
||||
message.sender_jid_row_id,
|
||||
@@ -248,7 +250,8 @@ def _get_messages_cursor_new(cursor, filter_empty, filter_date, filter_chat):
|
||||
jid_new.raw_string as new_jid,
|
||||
jid_global.type as jid_type,
|
||||
COALESCE(receipt_user.receipt_timestamp, message.received_timestamp) as received_timestamp,
|
||||
COALESCE(receipt_user.read_timestamp, receipt_user.played_timestamp) as read_timestamp
|
||||
COALESCE(receipt_user.read_timestamp, receipt_user.played_timestamp) as read_timestamp,
|
||||
message_media.raw_transcription_text as transcription_text
|
||||
FROM message
|
||||
LEFT JOIN message_quoted
|
||||
ON message_quoted.message_row_id = message._id
|
||||
@@ -280,6 +283,14 @@ def _get_messages_cursor_new(cursor, filter_empty, filter_date, filter_chat):
|
||||
ON jid_new._id = message_system_number_change.new_jid_row_id
|
||||
LEFT JOIN receipt_user
|
||||
ON receipt_user.message_row_id = message._id
|
||||
LEFT JOIN jid_map as jid_map_global
|
||||
ON chat.jid_row_id = jid_map_global.lid_row_id
|
||||
LEFT JOIN jid lid_global
|
||||
ON jid_map_global.jid_row_id = lid_global._id
|
||||
LEFT JOIN jid_map as jid_map_group
|
||||
ON message.sender_jid_row_id = jid_map_group.lid_row_id
|
||||
LEFT JOIN jid lid_group
|
||||
ON jid_map_group.jid_row_id = lid_group._id
|
||||
WHERE key_remote_jid <> '-1'
|
||||
{empty_filter}
|
||||
{date_filter}
|
||||
@@ -321,7 +332,7 @@ def _process_single_message(data, content, table_message, timezone_offset):
|
||||
timestamp=content["timestamp"],
|
||||
time=content["timestamp"],
|
||||
key_id=content["key_id"],
|
||||
timezone_offset=timezone_offset if timezone_offset else CURRENT_TZ_OFFSET,
|
||||
timezone_offset=timezone_offset,
|
||||
message_type=content["media_wa_type"],
|
||||
received_timestamp=content["received_timestamp"],
|
||||
read_timestamp=content["read_timestamp"]
|
||||
@@ -353,9 +364,12 @@ def _process_single_message(data, content, table_message, timezone_offset):
|
||||
if not table_message and content["media_caption"] is not None:
|
||||
# Old schema
|
||||
message.caption = content["media_caption"]
|
||||
elif table_message and content["media_wa_type"] == 1 and content["data"] is not None:
|
||||
elif table_message:
|
||||
# New schema
|
||||
message.caption = content["data"]
|
||||
if content["media_wa_type"] == 1 and content["data"] is not None:
|
||||
message.caption = content["data"]
|
||||
elif content["media_wa_type"] == 2 and content["transcription_text"] is not None:
|
||||
message.caption = f'"{content["transcription_text"]}"'
|
||||
else:
|
||||
message.caption = None
|
||||
|
||||
@@ -547,7 +561,7 @@ def _get_reactions(db, data):
|
||||
logger.info(f"Processed reactions{CLEAR_LINE}")
|
||||
|
||||
|
||||
def media(db, data, media_folder, filter_date, filter_chat, filter_empty, separate_media=True):
|
||||
def media(db, data, media_folder, filter_date, filter_chat, filter_empty, separate_media=True, fix_dot_files=False):
|
||||
"""
|
||||
Process WhatsApp media files from the database.
|
||||
|
||||
@@ -562,8 +576,6 @@ def media(db, data, media_folder, filter_date, filter_chat, filter_empty, separa
|
||||
"""
|
||||
c = db.cursor()
|
||||
total_row_number = _get_media_count(c, filter_empty, filter_date, filter_chat)
|
||||
logger.info(f"Processing media...(0/{total_row_number})\r")
|
||||
|
||||
try:
|
||||
content_cursor = _get_media_cursor_legacy(c, filter_empty, filter_date, filter_chat)
|
||||
except sqlite3.OperationalError:
|
||||
@@ -575,18 +587,12 @@ def media(db, data, media_folder, filter_date, filter_chat, filter_empty, separa
|
||||
# Ensure thumbnails directory exists
|
||||
Path(f"{media_folder}/thumbnails").mkdir(parents=True, exist_ok=True)
|
||||
|
||||
i = 0
|
||||
while content is not None:
|
||||
_process_single_media(data, content, media_folder, mime, separate_media)
|
||||
|
||||
i += 1
|
||||
if i % 100 == 0:
|
||||
logger.info(f"Processing media...({i}/{total_row_number})\r")
|
||||
|
||||
content = content_cursor.fetchone()
|
||||
|
||||
logger.info(f"Processed {total_row_number} media{CLEAR_LINE}")
|
||||
|
||||
with tqdm(total=total_row_number, desc="Processing media", unit="media", leave=False) as pbar:
|
||||
while (content := _fetch_row_safely(content_cursor)) is not None:
|
||||
_process_single_media(data, content, media_folder, mime, separate_media, fix_dot_files)
|
||||
pbar.update(1)
|
||||
total_time = pbar.format_dict['elapsed']
|
||||
logger.info(f"Processed {total_row_number} media in {convert_time_unit(total_time)}{CLEAR_LINE}")
|
||||
|
||||
# Helper functions for media processing
|
||||
|
||||
@@ -617,11 +623,13 @@ def _get_media_count(cursor, filter_empty, filter_date, filter_chat):
|
||||
empty_filter = get_cond_for_empty(filter_empty, "jid.raw_string", "broadcast")
|
||||
date_filter = f'AND message.timestamp {filter_date}' if filter_date is not None else ''
|
||||
include_filter = get_chat_condition(
|
||||
filter_chat[0], True, ["jid.raw_string", "jid_group.raw_string"], "jid", "android")
|
||||
filter_chat[0], True, ["key_remote_jid", "group_sender_jid"], "jid", "android")
|
||||
exclude_filter = get_chat_condition(
|
||||
filter_chat[1], False, ["jid.raw_string", "jid_group.raw_string"], "jid", "android")
|
||||
filter_chat[1], False, ["key_remote_jid", "group_sender_jid"], "jid", "android")
|
||||
|
||||
cursor.execute(f"""SELECT count()
|
||||
cursor.execute(f"""SELECT count(),
|
||||
COALESCE(lid_global.raw_string, jid.raw_string) as key_remote_jid,
|
||||
COALESCE(lid_group.raw_string, jid_group.raw_string) as group_sender_jid
|
||||
FROM message_media
|
||||
INNER JOIN message
|
||||
ON message_media.message_row_id = message._id
|
||||
@@ -631,6 +639,14 @@ def _get_media_count(cursor, filter_empty, filter_date, filter_chat):
|
||||
ON jid._id = chat.jid_row_id
|
||||
LEFT JOIN jid jid_group
|
||||
ON jid_group._id = message.sender_jid_row_id
|
||||
LEFT JOIN jid_map as jid_map_global
|
||||
ON chat.jid_row_id = jid_map_global.lid_row_id
|
||||
LEFT JOIN jid lid_global
|
||||
ON jid_map_global.jid_row_id = lid_global._id
|
||||
LEFT JOIN jid_map as jid_map_group
|
||||
ON message.sender_jid_row_id = jid_map_group.lid_row_id
|
||||
LEFT JOIN jid lid_group
|
||||
ON jid_map_group.jid_row_id = lid_group._id
|
||||
WHERE 1=1
|
||||
{empty_filter}
|
||||
{date_filter}
|
||||
@@ -679,18 +695,19 @@ def _get_media_cursor_new(cursor, filter_empty, filter_date, filter_chat):
|
||||
empty_filter = get_cond_for_empty(filter_empty, "key_remote_jid", "broadcast")
|
||||
date_filter = f'AND message.timestamp {filter_date}' if filter_date is not None else ''
|
||||
include_filter = get_chat_condition(
|
||||
filter_chat[0], True, ["key_remote_jid", "jid_group.raw_string"], "jid", "android")
|
||||
filter_chat[0], True, ["key_remote_jid", "group_sender_jid"], "jid", "android")
|
||||
exclude_filter = get_chat_condition(
|
||||
filter_chat[1], False, ["key_remote_jid", "jid_group.raw_string"], "jid", "android")
|
||||
filter_chat[1], False, ["key_remote_jid", "group_sender_jid"], "jid", "android")
|
||||
|
||||
cursor.execute(f"""SELECT jid.raw_string as key_remote_jid,
|
||||
cursor.execute(f"""SELECT COALESCE(lid_global.raw_string, jid.raw_string) as key_remote_jid,
|
||||
message_row_id,
|
||||
file_path,
|
||||
message_url,
|
||||
mime_type,
|
||||
media_key,
|
||||
file_hash,
|
||||
thumbnail
|
||||
thumbnail,
|
||||
COALESCE(lid_group.raw_string, jid_group.raw_string) as group_sender_jid
|
||||
FROM message_media
|
||||
INNER JOIN message
|
||||
ON message_media.message_row_id = message._id
|
||||
@@ -702,6 +719,14 @@ def _get_media_cursor_new(cursor, filter_empty, filter_date, filter_chat):
|
||||
ON message_media.file_hash = media_hash_thumbnail.media_hash
|
||||
LEFT JOIN jid jid_group
|
||||
ON jid_group._id = message.sender_jid_row_id
|
||||
LEFT JOIN jid_map as jid_map_global
|
||||
ON chat.jid_row_id = jid_map_global.lid_row_id
|
||||
LEFT JOIN jid lid_global
|
||||
ON jid_map_global.jid_row_id = lid_global._id
|
||||
LEFT JOIN jid_map as jid_map_group
|
||||
ON message.sender_jid_row_id = jid_map_group.lid_row_id
|
||||
LEFT JOIN jid lid_group
|
||||
ON jid_map_group.jid_row_id = lid_group._id
|
||||
WHERE jid.type <> 7
|
||||
{empty_filter}
|
||||
{date_filter}
|
||||
@@ -711,7 +736,7 @@ def _get_media_cursor_new(cursor, filter_empty, filter_date, filter_chat):
|
||||
return cursor
|
||||
|
||||
|
||||
def _process_single_media(data, content, media_folder, mime, separate_media):
|
||||
def _process_single_media(data, content, media_folder, mime, separate_media, fix_dot_files=False):
|
||||
"""Process a single media file."""
|
||||
file_path = f"{media_folder}/{content['file_path']}"
|
||||
current_chat = data.get_chat(content["key_remote_jid"])
|
||||
@@ -719,8 +744,6 @@ def _process_single_media(data, content, media_folder, mime, separate_media):
|
||||
message.media = True
|
||||
|
||||
if os.path.isfile(file_path):
|
||||
message.data = file_path
|
||||
|
||||
# Set mime type
|
||||
if content["mime_type"] is None:
|
||||
guess = mime.guess_type(file_path)[0]
|
||||
@@ -730,6 +753,16 @@ def _process_single_media(data, content, media_folder, mime, separate_media):
|
||||
message.mime = "application/octet-stream"
|
||||
else:
|
||||
message.mime = content["mime_type"]
|
||||
|
||||
if fix_dot_files and file_path.endswith("."):
|
||||
extension = mime.guess_extension(message.mime)
|
||||
if message.mime == "application/octet-stream" or not extension:
|
||||
new_file_path = file_path[:-1]
|
||||
else:
|
||||
extension = mime.guess_extension(message.mime)
|
||||
new_file_path = file_path[:-1] + extension
|
||||
os.rename(file_path, new_file_path)
|
||||
file_path = new_file_path
|
||||
|
||||
# Copy media to separate folder if needed
|
||||
if separate_media:
|
||||
@@ -741,6 +774,8 @@ def _process_single_media(data, content, media_folder, mime, separate_media):
|
||||
new_path = os.path.join(new_folder, current_filename)
|
||||
shutil.copy2(file_path, new_path)
|
||||
message.data = new_path
|
||||
else:
|
||||
message.data = file_path
|
||||
else:
|
||||
message.data = "The media is missing"
|
||||
message.mime = "media"
|
||||
@@ -764,45 +799,56 @@ def vcard(db, data, media_folder, filter_date, filter_chat, filter_empty):
|
||||
rows = _execute_vcard_query_legacy(c, filter_date, filter_chat, filter_empty)
|
||||
|
||||
total_row_number = len(rows)
|
||||
logger.info(f"Processing vCards...(0/{total_row_number})\r")
|
||||
|
||||
# Create vCards directory if it doesn't exist
|
||||
path = os.path.join(media_folder, "vCards")
|
||||
Path(path).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
for index, row in enumerate(rows):
|
||||
_process_vcard_row(row, path, data)
|
||||
logger.info(f"Processing vCards...({index + 1}/{total_row_number})\r")
|
||||
logger.info(f"Processed {total_row_number} vCards{CLEAR_LINE}")
|
||||
|
||||
with tqdm(total=total_row_number, desc="Processing vCards", unit="vcard", leave=False) as pbar:
|
||||
for row in rows:
|
||||
_process_vcard_row(row, path, data)
|
||||
pbar.update(1)
|
||||
total_time = pbar.format_dict['elapsed']
|
||||
logger.info(f"Processed {total_row_number} vCards in {convert_time_unit(total_time)}{CLEAR_LINE}")
|
||||
|
||||
def _execute_vcard_query_modern(c, filter_date, filter_chat, filter_empty):
|
||||
"""Execute vCard query for modern WhatsApp database schema."""
|
||||
|
||||
# Build the filter conditions
|
||||
chat_filter_include = get_chat_condition(
|
||||
filter_chat[0], True, ["messages.key_remote_jid", "remote_resource"], "jid", "android")
|
||||
chat_filter_exclude = get_chat_condition(
|
||||
filter_chat[1], False, ["messages.key_remote_jid", "remote_resource"], "jid", "android")
|
||||
date_filter = f'AND messages.timestamp {filter_date}' if filter_date is not None else ''
|
||||
empty_filter = get_cond_for_empty(filter_empty, "key_remote_jid", "messages.needs_push")
|
||||
include_filter = get_chat_condition(
|
||||
filter_chat[0], True, ["key_remote_jid", "group_sender_jid"], "jid", "android")
|
||||
exclude_filter = get_chat_condition(
|
||||
filter_chat[1], False, ["key_remote_jid", "group_sender_jid"], "jid", "android")
|
||||
|
||||
query = f"""SELECT message_row_id,
|
||||
messages.key_remote_jid,
|
||||
vcard,
|
||||
messages.media_name
|
||||
FROM messages_vcards
|
||||
INNER JOIN messages
|
||||
ON messages_vcards.message_row_id = messages._id
|
||||
INNER JOIN jid
|
||||
ON messages.key_remote_jid = jid.raw_string
|
||||
LEFT JOIN chat
|
||||
ON chat.jid_row_id = jid._id
|
||||
COALESCE(lid_global.raw_string, jid.raw_string) as key_remote_jid,
|
||||
vcard,
|
||||
messages.media_name,
|
||||
COALESCE(lid_group.raw_string, jid_group.raw_string) as group_sender_jid
|
||||
FROM messages_vcards
|
||||
INNER JOIN messages
|
||||
ON messages_vcards.message_row_id = messages._id
|
||||
INNER JOIN jid
|
||||
ON messages.key_remote_jid = jid.raw_string
|
||||
LEFT JOIN chat
|
||||
ON chat.jid_row_id = jid._id
|
||||
LEFT JOIN jid jid_group
|
||||
ON jid_group._id = message.sender_jid_row_id
|
||||
LEFT JOIN jid_map as jid_map_global
|
||||
ON chat.jid_row_id = jid_map_global.lid_row_id
|
||||
LEFT JOIN jid lid_global
|
||||
ON jid_map_global.jid_row_id = lid_global._id
|
||||
LEFT JOIN jid_map as jid_map_group
|
||||
ON message.sender_jid_row_id = jid_map_group.lid_row_id
|
||||
LEFT JOIN jid lid_group
|
||||
ON jid_map_group.jid_row_id = lid_group._id
|
||||
WHERE 1=1
|
||||
{empty_filter}
|
||||
{date_filter}
|
||||
{chat_filter_include}
|
||||
{chat_filter_exclude}
|
||||
{include_filter}
|
||||
{exclude_filter}
|
||||
ORDER BY messages.key_remote_jid ASC;"""
|
||||
c.execute(query)
|
||||
return c.fetchall()
|
||||
@@ -879,32 +925,37 @@ def calls(db, data, timezone_offset, filter_chat):
|
||||
chat = ChatStore(Device.ANDROID, "WhatsApp Calls")
|
||||
|
||||
# Process each call
|
||||
content = calls_data.fetchone()
|
||||
while content is not None:
|
||||
_process_call_record(content, chat, data, timezone_offset)
|
||||
content = calls_data.fetchone()
|
||||
with tqdm(total=total_row_number, desc="Processing calls", unit="call", leave=False) as pbar:
|
||||
while (content := _fetch_row_safely(calls_data)) is not None:
|
||||
_process_call_record(content, chat, data, timezone_offset)
|
||||
pbar.update(1)
|
||||
total_time = pbar.format_dict['elapsed']
|
||||
|
||||
# Add the calls chat to the data
|
||||
data.add_chat("000000000000000", chat)
|
||||
logger.info(f"Processed {total_row_number} calls{CLEAR_LINE}")
|
||||
|
||||
logger.info(f"Processed {total_row_number} calls in {convert_time_unit(total_time)}{CLEAR_LINE}")
|
||||
|
||||
def _get_calls_count(c, filter_chat):
|
||||
"""Get the count of call records that match the filter."""
|
||||
|
||||
# Build the filter conditions
|
||||
chat_filter_include = get_chat_condition(filter_chat[0], True, ["jid.raw_string"])
|
||||
chat_filter_exclude = get_chat_condition(filter_chat[1], False, ["jid.raw_string"])
|
||||
include_filter = get_chat_condition(filter_chat[0], True, ["key_remote_jid"])
|
||||
exclude_filter = get_chat_condition(filter_chat[1], False, ["key_remote_jid"])
|
||||
|
||||
query = f"""SELECT count()
|
||||
query = f"""SELECT count(),
|
||||
COALESCE(lid_global.raw_string, jid.raw_string) as key_remote_jid
|
||||
FROM call_log
|
||||
INNER JOIN jid
|
||||
ON call_log.jid_row_id = jid._id
|
||||
LEFT JOIN chat
|
||||
ON call_log.jid_row_id = chat.jid_row_id
|
||||
LEFT JOIN jid_map as jid_map_global
|
||||
ON chat.jid_row_id = jid_map_global.lid_row_id
|
||||
LEFT JOIN jid lid_global
|
||||
ON jid_map_global.jid_row_id = lid_global._id
|
||||
WHERE 1=1
|
||||
{chat_filter_include}
|
||||
{chat_filter_exclude}"""
|
||||
{include_filter}
|
||||
{exclude_filter}"""
|
||||
c.execute(query)
|
||||
return c.fetchone()[0]
|
||||
|
||||
@@ -913,11 +964,11 @@ def _fetch_calls_data(c, filter_chat):
|
||||
"""Fetch call data from the database."""
|
||||
|
||||
# Build the filter conditions
|
||||
chat_filter_include = get_chat_condition(filter_chat[0], True, ["jid.raw_string"])
|
||||
chat_filter_exclude = get_chat_condition(filter_chat[1], False, ["jid.raw_string"])
|
||||
include_filter = get_chat_condition(filter_chat[0], True, ["key_remote_jid"])
|
||||
exclude_filter = get_chat_condition(filter_chat[1], False, ["key_remote_jid"])
|
||||
|
||||
query = f"""SELECT call_log._id,
|
||||
jid.raw_string,
|
||||
COALESCE(lid_global.raw_string, jid.raw_string) as key_remote_jid,
|
||||
from_me,
|
||||
call_id,
|
||||
timestamp,
|
||||
@@ -931,9 +982,13 @@ def _fetch_calls_data(c, filter_chat):
|
||||
ON call_log.jid_row_id = jid._id
|
||||
LEFT JOIN chat
|
||||
ON call_log.jid_row_id = chat.jid_row_id
|
||||
LEFT JOIN jid_map as jid_map_global
|
||||
ON chat.jid_row_id = jid_map_global.lid_row_id
|
||||
LEFT JOIN jid lid_global
|
||||
ON jid_map_global.jid_row_id = lid_global._id
|
||||
WHERE 1=1
|
||||
{chat_filter_include}
|
||||
{chat_filter_exclude}"""
|
||||
{include_filter}
|
||||
{exclude_filter}"""
|
||||
c.execute(query)
|
||||
return c
|
||||
|
||||
@@ -945,13 +1000,13 @@ def _process_call_record(content, chat, data, timezone_offset):
|
||||
timestamp=content["timestamp"],
|
||||
time=content["timestamp"],
|
||||
key_id=content["call_id"],
|
||||
timezone_offset=timezone_offset if timezone_offset else CURRENT_TZ_OFFSET,
|
||||
timezone_offset=timezone_offset,
|
||||
received_timestamp=None, # TODO: Add timestamp
|
||||
read_timestamp=None # TODO: Add timestamp
|
||||
)
|
||||
|
||||
# Get caller/callee name
|
||||
_jid = content["raw_string"]
|
||||
_jid = content["key_remote_jid"]
|
||||
name = data.get_chat(_jid).name if _jid in data else content["chat_subject"] or None
|
||||
if _jid is not None and "@" in _jid:
|
||||
fallback = _jid.split('@')[0]
|
||||
@@ -996,6 +1051,7 @@ def _construct_call_description(content, call):
|
||||
return description
|
||||
|
||||
|
||||
# TODO: Marked for enhancement on multi-threaded processing
|
||||
def create_html(
|
||||
data,
|
||||
output_folder,
|
||||
@@ -1011,7 +1067,6 @@ def create_html(
|
||||
template = setup_template(template, no_avatar, experimental)
|
||||
|
||||
total_row_number = len(data)
|
||||
logger.info(f"Generating chats...(0/{total_row_number})\r")
|
||||
|
||||
# Create output directory if it doesn't exist
|
||||
if not os.path.isdir(output_folder):
|
||||
@@ -1019,43 +1074,42 @@ def create_html(
|
||||
|
||||
w3css = get_status_location(output_folder, offline_static)
|
||||
|
||||
for current, contact in enumerate(data):
|
||||
current_chat = data.get_chat(contact)
|
||||
if len(current_chat) == 0:
|
||||
# Skip empty chats
|
||||
continue
|
||||
with tqdm(total=total_row_number, desc="Generating HTML", unit="file", leave=False) as pbar:
|
||||
for contact in data:
|
||||
current_chat = data.get_chat(contact)
|
||||
if len(current_chat) == 0:
|
||||
# Skip empty chats
|
||||
continue
|
||||
|
||||
safe_file_name, name = get_file_name(contact, current_chat)
|
||||
safe_file_name, name = get_file_name(contact, current_chat)
|
||||
|
||||
if maximum_size is not None:
|
||||
_generate_paginated_chat(
|
||||
current_chat,
|
||||
safe_file_name,
|
||||
name,
|
||||
contact,
|
||||
output_folder,
|
||||
template,
|
||||
w3css,
|
||||
maximum_size,
|
||||
headline
|
||||
)
|
||||
else:
|
||||
_generate_single_chat(
|
||||
current_chat,
|
||||
safe_file_name,
|
||||
name,
|
||||
contact,
|
||||
output_folder,
|
||||
template,
|
||||
w3css,
|
||||
headline
|
||||
)
|
||||
|
||||
if current % 10 == 0:
|
||||
logger.info(f"Generating chats...({current}/{total_row_number})\r")
|
||||
|
||||
logger.info(f"Generated {total_row_number} chats{CLEAR_LINE}")
|
||||
if maximum_size is not None:
|
||||
_generate_paginated_chat(
|
||||
current_chat,
|
||||
safe_file_name,
|
||||
name,
|
||||
contact,
|
||||
output_folder,
|
||||
template,
|
||||
w3css,
|
||||
maximum_size,
|
||||
headline
|
||||
)
|
||||
else:
|
||||
_generate_single_chat(
|
||||
current_chat,
|
||||
safe_file_name,
|
||||
name,
|
||||
contact,
|
||||
output_folder,
|
||||
template,
|
||||
w3css,
|
||||
headline
|
||||
)
|
||||
|
||||
pbar.update(1)
|
||||
total_time = pbar.format_dict['elapsed']
|
||||
logger.info(f"Generated {total_row_number} chats in {convert_time_unit(total_time)}{CLEAR_LINE}")
|
||||
|
||||
def _generate_single_chat(current_chat, safe_file_name, name, contact, output_folder, template, w3css, headline):
|
||||
"""Generate a single HTML file for a chat."""
|
||||
|
||||
@@ -279,7 +279,7 @@ class Message:
|
||||
key_id: Union[int, str],
|
||||
received_timestamp: int = None,
|
||||
read_timestamp: int = None,
|
||||
timezone_offset: int = 0,
|
||||
timezone_offset: Optional[Timing] = Timing(0),
|
||||
message_type: Optional[int] = None
|
||||
) -> None:
|
||||
"""
|
||||
@@ -300,10 +300,9 @@ class Message:
|
||||
"""
|
||||
self.from_me = bool(from_me)
|
||||
self.timestamp = timestamp / 1000 if timestamp > 9999999999 else timestamp
|
||||
timing = Timing(timezone_offset)
|
||||
|
||||
if isinstance(time, (int, float)):
|
||||
self.time = timing.format_timestamp(self.timestamp, "%H:%M")
|
||||
self.time = timezone_offset.format_timestamp(self.timestamp, "%H:%M")
|
||||
elif isinstance(time, str):
|
||||
self.time = time
|
||||
else:
|
||||
@@ -318,14 +317,14 @@ class Message:
|
||||
self.mime = None
|
||||
self.message_type = message_type
|
||||
if isinstance(received_timestamp, (int, float)):
|
||||
self.received_timestamp = timing.format_timestamp(
|
||||
self.received_timestamp = timezone_offset.format_timestamp(
|
||||
received_timestamp, "%Y/%m/%d %H:%M")
|
||||
elif isinstance(received_timestamp, str):
|
||||
self.received_timestamp = received_timestamp
|
||||
else:
|
||||
self.received_timestamp = None
|
||||
if isinstance(read_timestamp, (int, float)):
|
||||
self.read_timestamp = timing.format_timestamp(
|
||||
self.read_timestamp = timezone_offset.format_timestamp(
|
||||
read_timestamp, "%Y/%m/%d %H:%M")
|
||||
elif isinstance(read_timestamp, str):
|
||||
self.read_timestamp = read_timestamp
|
||||
|
||||
@@ -4,8 +4,9 @@ import os
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from mimetypes import MimeTypes
|
||||
from tqdm import tqdm
|
||||
from Whatsapp_Chat_Exporter.data_model import ChatStore, Message
|
||||
from Whatsapp_Chat_Exporter.utility import CLEAR_LINE, Device
|
||||
from Whatsapp_Chat_Exporter.utility import CLEAR_LINE, Device, convert_time_unit
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -34,17 +35,16 @@ def messages(path, data, assume_first_as_me=False):
|
||||
|
||||
# Second pass: process the messages
|
||||
with open(path, "r", encoding="utf8") as file:
|
||||
for index, line in enumerate(file):
|
||||
you, user_identification_done = process_line(
|
||||
line, index, chat, path, you,
|
||||
assume_first_as_me, user_identification_done
|
||||
)
|
||||
with tqdm(total=total_row_number, desc="Processing messages & media", unit="msg&media", leave=False) as pbar:
|
||||
for index, line in enumerate(file):
|
||||
you, user_identification_done = process_line(
|
||||
line, index, chat, path, you,
|
||||
assume_first_as_me, user_identification_done
|
||||
)
|
||||
pbar.update(1)
|
||||
total_time = pbar.format_dict['elapsed']
|
||||
logger.info(f"Processed {total_row_number} messages & media in {convert_time_unit(total_time)}{CLEAR_LINE}")
|
||||
|
||||
# Show progress
|
||||
if index % 1000 == 0:
|
||||
logger.info(f"Processing messages & media...({index}/{total_row_number})\r")
|
||||
|
||||
logger.info(f"Processed {total_row_number} messages & media{CLEAR_LINE}")
|
||||
return data
|
||||
|
||||
|
||||
|
||||
@@ -4,12 +4,13 @@ import os
|
||||
import logging
|
||||
import shutil
|
||||
from glob import glob
|
||||
from tqdm import tqdm
|
||||
from pathlib import Path
|
||||
from mimetypes import MimeTypes
|
||||
from markupsafe import escape as htmle
|
||||
from Whatsapp_Chat_Exporter.data_model import ChatStore, Message
|
||||
from Whatsapp_Chat_Exporter.utility import APPLE_TIME, CLEAR_LINE, CURRENT_TZ_OFFSET, get_chat_condition
|
||||
from Whatsapp_Chat_Exporter.utility import bytes_to_readable, convert_time_unit, safe_name, Device
|
||||
from Whatsapp_Chat_Exporter.utility import APPLE_TIME, CLEAR_LINE, get_chat_condition, Device
|
||||
from Whatsapp_Chat_Exporter.utility import bytes_to_readable, convert_time_unit, safe_name
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -23,17 +24,18 @@ def contacts(db, data):
|
||||
logger.info(f"Pre-processing contacts...({total_row_number})\r")
|
||||
|
||||
c.execute("""SELECT ZWHATSAPPID, ZABOUTTEXT FROM ZWAADDRESSBOOKCONTACT WHERE ZABOUTTEXT IS NOT NULL""")
|
||||
content = c.fetchone()
|
||||
while content is not None:
|
||||
zwhatsapp_id = content["ZWHATSAPPID"]
|
||||
if not zwhatsapp_id.endswith("@s.whatsapp.net"):
|
||||
zwhatsapp_id += "@s.whatsapp.net"
|
||||
with tqdm(total=total_row_number, desc="Processing contacts", unit="contact", leave=False) as pbar:
|
||||
while (content := c.fetchone()) is not None:
|
||||
zwhatsapp_id = content["ZWHATSAPPID"]
|
||||
if not zwhatsapp_id.endswith("@s.whatsapp.net"):
|
||||
zwhatsapp_id += "@s.whatsapp.net"
|
||||
|
||||
current_chat = ChatStore(Device.IOS)
|
||||
current_chat.status = content["ZABOUTTEXT"]
|
||||
data.add_chat(zwhatsapp_id, current_chat)
|
||||
content = c.fetchone()
|
||||
logger.info(f"Pre-processed {total_row_number} contacts{CLEAR_LINE}")
|
||||
current_chat = ChatStore(Device.IOS)
|
||||
current_chat.status = content["ZABOUTTEXT"]
|
||||
data.add_chat(zwhatsapp_id, current_chat)
|
||||
pbar.update(1)
|
||||
total_time = pbar.format_dict['elapsed']
|
||||
logger.info(f"Pre-processed {total_row_number} contacts in {convert_time_unit(total_time)}{CLEAR_LINE}")
|
||||
|
||||
|
||||
def process_contact_avatars(current_chat, media_folder, contact_id):
|
||||
@@ -92,7 +94,6 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat,
|
||||
"""
|
||||
c.execute(contact_query)
|
||||
total_row_number = c.fetchone()[0]
|
||||
logger.info(f"Processing contacts...({total_row_number})\r")
|
||||
|
||||
# Get distinct contacts
|
||||
contacts_query = f"""
|
||||
@@ -114,24 +115,24 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat,
|
||||
c.execute(contacts_query)
|
||||
|
||||
# Process each contact
|
||||
content = c.fetchone()
|
||||
while content is not None:
|
||||
contact_name = get_contact_name(content)
|
||||
contact_id = content["ZCONTACTJID"]
|
||||
with tqdm(total=total_row_number, desc="Processing contacts", unit="contact", leave=False) as pbar:
|
||||
while (content := c.fetchone()) is not None:
|
||||
contact_name = get_contact_name(content)
|
||||
contact_id = content["ZCONTACTJID"]
|
||||
|
||||
# Add or update chat
|
||||
if contact_id not in data:
|
||||
current_chat = data.add_chat(contact_id, ChatStore(Device.IOS, contact_name, media_folder))
|
||||
else:
|
||||
current_chat = data.get_chat(contact_id)
|
||||
current_chat.name = contact_name
|
||||
current_chat.my_avatar = os.path.join(media_folder, "Media/Profile/Photo.jpg")
|
||||
# Add or update chat
|
||||
if contact_id not in data:
|
||||
current_chat = data.add_chat(contact_id, ChatStore(Device.IOS, contact_name, media_folder))
|
||||
else:
|
||||
current_chat = data.get_chat(contact_id)
|
||||
current_chat.name = contact_name
|
||||
current_chat.my_avatar = os.path.join(media_folder, "Media/Profile/Photo.jpg")
|
||||
|
||||
# Process avatar images
|
||||
process_contact_avatars(current_chat, media_folder, contact_id)
|
||||
content = c.fetchone()
|
||||
|
||||
logger.info(f"Processed {total_row_number} contacts{CLEAR_LINE}")
|
||||
# Process avatar images
|
||||
process_contact_avatars(current_chat, media_folder, contact_id)
|
||||
pbar.update(1)
|
||||
total_time = pbar.format_dict['elapsed']
|
||||
logger.info(f"Processed {total_row_number} contacts in {convert_time_unit(total_time)}{CLEAR_LINE}")
|
||||
|
||||
# Get message count
|
||||
message_count_query = f"""
|
||||
@@ -190,46 +191,42 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat,
|
||||
message_map = {row[0][:17]: row[1] or row[2] for row in cursor2.fetchall() if row[0]}
|
||||
|
||||
# Process each message
|
||||
i = 0
|
||||
content = c.fetchone()
|
||||
while content is not None:
|
||||
contact_id = content["ZCONTACTJID"]
|
||||
message_pk = content["Z_PK"]
|
||||
is_group_message = content["ZGROUPINFO"] is not None
|
||||
with tqdm(total=total_row_number, desc="Processing messages", unit="msg", leave=False) as pbar:
|
||||
while (content := c.fetchone()) is not None:
|
||||
contact_id = content["ZCONTACTJID"]
|
||||
message_pk = content["Z_PK"]
|
||||
is_group_message = content["ZGROUPINFO"] is not None
|
||||
|
||||
# Ensure chat exists
|
||||
if contact_id not in data:
|
||||
current_chat = data.add_chat(contact_id, ChatStore(Device.IOS))
|
||||
process_contact_avatars(current_chat, media_folder, contact_id)
|
||||
else:
|
||||
current_chat = data.get_chat(contact_id)
|
||||
# Ensure chat exists
|
||||
if contact_id not in data:
|
||||
current_chat = data.add_chat(contact_id, ChatStore(Device.IOS))
|
||||
process_contact_avatars(current_chat, media_folder, contact_id)
|
||||
else:
|
||||
current_chat = data.get_chat(contact_id)
|
||||
|
||||
# Create message object
|
||||
ts = APPLE_TIME + content["ZMESSAGEDATE"]
|
||||
message = Message(
|
||||
from_me=content["ZISFROMME"],
|
||||
timestamp=ts,
|
||||
time=ts,
|
||||
key_id=content["ZSTANZAID"][:17],
|
||||
timezone_offset=timezone_offset if timezone_offset else CURRENT_TZ_OFFSET,
|
||||
message_type=content["ZMESSAGETYPE"],
|
||||
received_timestamp=APPLE_TIME + content["ZSENTDATE"] if content["ZSENTDATE"] else None,
|
||||
read_timestamp=None # TODO: Add timestamp
|
||||
)
|
||||
# Create message object
|
||||
ts = APPLE_TIME + content["ZMESSAGEDATE"]
|
||||
message = Message(
|
||||
from_me=content["ZISFROMME"],
|
||||
timestamp=ts,
|
||||
time=ts,
|
||||
key_id=content["ZSTANZAID"][:17],
|
||||
timezone_offset=timezone_offset,
|
||||
message_type=content["ZMESSAGETYPE"],
|
||||
received_timestamp=APPLE_TIME + content["ZSENTDATE"] if content["ZSENTDATE"] else None,
|
||||
read_timestamp=None # TODO: Add timestamp
|
||||
)
|
||||
|
||||
# Process message data
|
||||
invalid = process_message_data(message, content, is_group_message, data, message_map, no_reply)
|
||||
# Process message data
|
||||
invalid = process_message_data(message, content, is_group_message, data, message_map, no_reply)
|
||||
|
||||
# Add valid messages to chat
|
||||
if not invalid:
|
||||
current_chat.add_message(message_pk, message)
|
||||
# Add valid messages to chat
|
||||
if not invalid:
|
||||
current_chat.add_message(message_pk, message)
|
||||
|
||||
# Update progress
|
||||
i += 1
|
||||
if i % 1000 == 0:
|
||||
logger.info(f"Processing messages...({i}/{total_row_number})\r")
|
||||
content = c.fetchone()
|
||||
logger.info(f"Processed {total_row_number} messages{CLEAR_LINE}")
|
||||
pbar.update(1)
|
||||
total_time = pbar.format_dict['elapsed']
|
||||
logger.info(f"Processed {total_row_number} messages in {convert_time_unit(total_time)}{CLEAR_LINE}")
|
||||
|
||||
|
||||
def process_message_data(message, content, is_group_message, data, message_map, no_reply):
|
||||
@@ -315,7 +312,7 @@ def process_message_text(message, content):
|
||||
message.data = msg
|
||||
|
||||
|
||||
def media(db, data, media_folder, filter_date, filter_chat, filter_empty, separate_media=False):
|
||||
def media(db, data, media_folder, filter_date, filter_chat, filter_empty, separate_media=False, fix_dot_files=False):
|
||||
"""Process media files from WhatsApp messages."""
|
||||
c = db.cursor()
|
||||
|
||||
@@ -371,20 +368,15 @@ def media(db, data, media_folder, filter_date, filter_chat, filter_empty, separa
|
||||
|
||||
# Process each media item
|
||||
mime = MimeTypes()
|
||||
i = 0
|
||||
content = c.fetchone()
|
||||
while content is not None:
|
||||
process_media_item(content, data, media_folder, mime, separate_media)
|
||||
|
||||
# Update progress
|
||||
i += 1
|
||||
if i % 100 == 0:
|
||||
logger.info(f"Processing media...({i}/{total_row_number})\r")
|
||||
content = c.fetchone()
|
||||
logger.info(f"Processed {total_row_number} media{CLEAR_LINE}")
|
||||
with tqdm(total=total_row_number, desc="Processing media", unit="media", leave=False) as pbar:
|
||||
while (content := c.fetchone()) is not None:
|
||||
process_media_item(content, data, media_folder, mime, separate_media, fix_dot_files)
|
||||
pbar.update(1)
|
||||
total_time = pbar.format_dict['elapsed']
|
||||
logger.info(f"Processed {total_row_number} media in {convert_time_unit(total_time)}{CLEAR_LINE}")
|
||||
|
||||
|
||||
def process_media_item(content, data, media_folder, mime, separate_media):
|
||||
def process_media_item(content, data, media_folder, mime, separate_media, fix_dot_files=False):
|
||||
"""Process a single media item."""
|
||||
file_path = f"{media_folder}/Message/{content['ZMEDIALOCALPATH']}"
|
||||
current_chat = data.get_chat(content["ZCONTACTJID"])
|
||||
@@ -395,14 +387,22 @@ def process_media_item(content, data, media_folder, mime, separate_media):
|
||||
current_chat.media_base = media_folder + "/"
|
||||
|
||||
if os.path.isfile(file_path):
|
||||
message.data = '/'.join(file_path.split("/")[1:])
|
||||
|
||||
# Set MIME type
|
||||
if content["ZVCARDSTRING"] is None:
|
||||
guess = mime.guess_type(file_path)[0]
|
||||
message.mime = guess if guess is not None else "application/octet-stream"
|
||||
else:
|
||||
message.mime = content["ZVCARDSTRING"]
|
||||
|
||||
if fix_dot_files and file_path.endswith("."):
|
||||
extension = mime.guess_extension(message.mime)
|
||||
if message.mime == "application/octet-stream" or not extension:
|
||||
new_file_path = file_path[:-1]
|
||||
else:
|
||||
extension = mime.guess_extension(message.mime)
|
||||
new_file_path = file_path[:-1] + extension
|
||||
os.rename(file_path, new_file_path)
|
||||
file_path = new_file_path
|
||||
|
||||
# Handle separate media option
|
||||
if separate_media:
|
||||
@@ -413,7 +413,9 @@ def process_media_item(content, data, media_folder, mime, separate_media):
|
||||
Path(new_folder).mkdir(parents=True, exist_ok=True)
|
||||
new_path = os.path.join(new_folder, current_filename)
|
||||
shutil.copy2(file_path, new_path)
|
||||
message.data = '/'.join(new_path.split("\\")[1:])
|
||||
message.data = '/'.join(new_path.split("/")[1:])
|
||||
else:
|
||||
message.data = '/'.join(file_path.split("/")[1:])
|
||||
else:
|
||||
# Handle missing media
|
||||
message.data = "The media is missing"
|
||||
@@ -467,10 +469,12 @@ def vcard(db, data, media_folder, filter_date, filter_chat, filter_empty):
|
||||
Path(path).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Process each vCard
|
||||
for index, content in enumerate(contents):
|
||||
process_vcard_item(content, path, data)
|
||||
logger.info(f"Processing vCards...({index + 1}/{total_row_number})\r")
|
||||
logger.info(f"Processed {total_row_number} vCards{CLEAR_LINE}")
|
||||
with tqdm(total=total_row_number, desc="Processing vCards", unit="vcard", leave=False) as pbar:
|
||||
for content in contents:
|
||||
process_vcard_item(content, path, data)
|
||||
pbar.update(1)
|
||||
total_time = pbar.format_dict['elapsed']
|
||||
logger.info(f"Processed {total_row_number} vCards in {convert_time_unit(total_time)}{CLEAR_LINE}")
|
||||
|
||||
|
||||
def process_vcard_item(content, path, data):
|
||||
@@ -530,8 +534,6 @@ def calls(db, data, timezone_offset, filter_chat):
|
||||
if total_row_number == 0:
|
||||
return
|
||||
|
||||
logger.info(f"Processed {total_row_number} calls{CLEAR_LINE}\n")
|
||||
|
||||
# Fetch call records
|
||||
calls_query = f"""
|
||||
SELECT ZCALLIDSTRING,
|
||||
@@ -556,14 +558,15 @@ def calls(db, data, timezone_offset, filter_chat):
|
||||
# Create calls chat
|
||||
chat = ChatStore(Device.ANDROID, "WhatsApp Calls")
|
||||
|
||||
# Process each call
|
||||
content = c.fetchone()
|
||||
while content is not None:
|
||||
process_call_record(content, chat, data, timezone_offset)
|
||||
content = c.fetchone()
|
||||
with tqdm(total=total_row_number, desc="Processing calls", unit="call", leave=False) as pbar:
|
||||
while (content := c.fetchone()) is not None:
|
||||
process_call_record(content, chat, data, timezone_offset)
|
||||
pbar.update(1)
|
||||
total_time = pbar.format_dict['elapsed']
|
||||
|
||||
# Add calls chat to data
|
||||
data.add_chat("000000000000000", chat)
|
||||
logger.info(f"Processed {total_row_number} calls in {convert_time_unit(total_time)}{CLEAR_LINE}")
|
||||
|
||||
|
||||
def process_call_record(content, chat, data, timezone_offset):
|
||||
@@ -574,7 +577,7 @@ def process_call_record(content, chat, data, timezone_offset):
|
||||
timestamp=ts,
|
||||
time=ts,
|
||||
key_id=content["ZCALLIDSTRING"],
|
||||
timezone_offset=timezone_offset if timezone_offset else CURRENT_TZ_OFFSET
|
||||
timezone_offset=timezone_offset
|
||||
)
|
||||
|
||||
# Set sender info
|
||||
|
||||
@@ -6,7 +6,9 @@ import sqlite3
|
||||
import os
|
||||
import getpass
|
||||
from sys import exit, platform as osname
|
||||
from Whatsapp_Chat_Exporter.utility import CLEAR_LINE, WhatsAppIdentifier
|
||||
import sys
|
||||
from tqdm import tqdm
|
||||
from Whatsapp_Chat_Exporter.utility import CLEAR_LINE, WhatsAppIdentifier, convert_time_unit
|
||||
from Whatsapp_Chat_Exporter.bplist import BPListReader
|
||||
try:
|
||||
from iphone_backup_decrypt import EncryptedBackup, RelativePath
|
||||
@@ -79,6 +81,8 @@ class BackupExtractor:
|
||||
|
||||
logger.info(f"Encryption detected on the backup!{CLEAR_LINE}")
|
||||
password = getpass.getpass("Enter the password for the backup:")
|
||||
sys.stdout.write("\033[F\033[K")
|
||||
sys.stdout.flush()
|
||||
self._decrypt_backup(password)
|
||||
self._extract_decrypted_files()
|
||||
|
||||
@@ -89,7 +93,7 @@ class BackupExtractor:
|
||||
Args:
|
||||
password (str): The password for the encrypted backup.
|
||||
"""
|
||||
logger.info(f"Trying to decrypt the iOS backup...{CLEAR_LINE}")
|
||||
logger.info(f"Trying to open the iOS backup...{CLEAR_LINE}")
|
||||
self.backup = EncryptedBackup(
|
||||
backup_directory=self.base_dir,
|
||||
passphrase=password,
|
||||
@@ -97,7 +101,7 @@ class BackupExtractor:
|
||||
check_same_thread=False,
|
||||
decrypt_chunk_size=self.decrypt_chunk_size,
|
||||
)
|
||||
logger.info(f"iOS backup decrypted successfully{CLEAR_LINE}")
|
||||
logger.info(f"iOS backup is opened successfully{CLEAR_LINE}")
|
||||
logger.info("Decrypting WhatsApp database...\r")
|
||||
try:
|
||||
self.backup.extract_file(
|
||||
@@ -130,9 +134,12 @@ class BackupExtractor:
|
||||
|
||||
def _extract_decrypted_files(self):
|
||||
"""Extract all WhatsApp files after decryption"""
|
||||
pbar = tqdm(desc="Decrypting and extracting files", unit="file", leave=False)
|
||||
def extract_progress_handler(file_id, domain, relative_path, n, total_files):
|
||||
if n % 100 == 0:
|
||||
logger.info(f"Decrypting and extracting files...({n}/{total_files})\r")
|
||||
if pbar.total is None:
|
||||
pbar.total = total_files
|
||||
pbar.n = n
|
||||
pbar.refresh()
|
||||
return True
|
||||
|
||||
self.backup.extract_files(
|
||||
@@ -141,7 +148,9 @@ class BackupExtractor:
|
||||
preserve_folders=True,
|
||||
filter_callback=extract_progress_handler
|
||||
)
|
||||
logger.info(f"All required files are decrypted and extracted.{CLEAR_LINE}")
|
||||
total_time = pbar.format_dict['elapsed']
|
||||
pbar.close()
|
||||
logger.info(f"All required files are decrypted and extracted in {convert_time_unit(total_time)}{CLEAR_LINE}")
|
||||
|
||||
def _extract_unencrypted_backup(self):
|
||||
"""
|
||||
@@ -192,7 +201,6 @@ class BackupExtractor:
|
||||
c = manifest.cursor()
|
||||
c.execute(f"SELECT count() FROM Files WHERE domain = '{_wts_id}'")
|
||||
total_row_number = c.fetchone()[0]
|
||||
logger.info(f"Extracting WhatsApp files...(0/{total_row_number})\r")
|
||||
c.execute(
|
||||
f"""
|
||||
SELECT fileID, relativePath, flags, file AS metadata,
|
||||
@@ -205,33 +213,30 @@ class BackupExtractor:
|
||||
if not os.path.isdir(_wts_id):
|
||||
os.mkdir(_wts_id)
|
||||
|
||||
row = c.fetchone()
|
||||
while row is not None:
|
||||
if not row["relativePath"]: # Skip empty relative paths
|
||||
row = c.fetchone()
|
||||
continue
|
||||
with tqdm(total=total_row_number, desc="Extracting WhatsApp files", unit="file", leave=False) as pbar:
|
||||
while (row := c.fetchone()) is not None:
|
||||
if not row["relativePath"]: # Skip empty relative paths
|
||||
continue
|
||||
|
||||
destination = os.path.join(_wts_id, row["relativePath"])
|
||||
hashes = row["fileID"]
|
||||
folder = hashes[:2]
|
||||
flags = row["flags"]
|
||||
destination = os.path.join(_wts_id, row["relativePath"])
|
||||
hashes = row["fileID"]
|
||||
folder = hashes[:2]
|
||||
flags = row["flags"]
|
||||
|
||||
if flags == 2: # Directory
|
||||
try:
|
||||
os.mkdir(destination)
|
||||
except FileExistsError:
|
||||
pass
|
||||
elif flags == 1: # File
|
||||
shutil.copyfile(os.path.join(self.base_dir, folder, hashes), destination)
|
||||
metadata = BPListReader(row["metadata"]).parse()
|
||||
creation = metadata["$objects"][1]["Birth"]
|
||||
modification = metadata["$objects"][1]["LastModified"]
|
||||
os.utime(destination, (modification, modification))
|
||||
|
||||
if row["_index"] % 100 == 0:
|
||||
logger.info(f"Extracting WhatsApp files...({row['_index']}/{total_row_number})\r")
|
||||
row = c.fetchone()
|
||||
logger.info(f"Extracted WhatsApp files...({total_row_number}){CLEAR_LINE}")
|
||||
if flags == 2: # Directory
|
||||
try:
|
||||
os.mkdir(destination)
|
||||
except FileExistsError:
|
||||
pass
|
||||
elif flags == 1: # File
|
||||
shutil.copyfile(os.path.join(self.base_dir, folder, hashes), destination)
|
||||
metadata = BPListReader(row["metadata"]).parse()
|
||||
_creation = metadata["$objects"][1]["Birth"]
|
||||
modification = metadata["$objects"][1]["LastModified"]
|
||||
os.utime(destination, (modification, modification))
|
||||
pbar.update(1)
|
||||
total_time = pbar.format_dict['elapsed']
|
||||
logger.info(f"Extracted {total_row_number} WhatsApp files in {convert_time_unit(total_time)}{CLEAR_LINE}")
|
||||
|
||||
|
||||
def extract_media(base_dir, identifiers, decrypt_chunk_size):
|
||||
|
||||
@@ -5,13 +5,13 @@ import json
|
||||
import os
|
||||
import unicodedata
|
||||
import re
|
||||
import string
|
||||
import math
|
||||
import shutil
|
||||
from bleach import clean as sanitize
|
||||
from markupsafe import Markup
|
||||
from datetime import datetime, timedelta
|
||||
from enum import IntEnum
|
||||
from tqdm import tqdm
|
||||
from Whatsapp_Chat_Exporter.data_model import ChatCollection, ChatStore, Timing
|
||||
from typing import Dict, List, Optional, Tuple, Union
|
||||
try:
|
||||
@@ -248,13 +248,13 @@ def import_from_json(json_file: str, data: ChatCollection):
|
||||
with open(json_file, "r") as f:
|
||||
temp_data = json.loads(f.read())
|
||||
total_row_number = len(tuple(temp_data.keys()))
|
||||
logger.info(f"Importing chats from JSON...(0/{total_row_number})\r")
|
||||
for index, (jid, chat_data) in enumerate(temp_data.items()):
|
||||
chat = ChatStore.from_json(chat_data)
|
||||
data.add_chat(jid, chat)
|
||||
logger.info(
|
||||
f"Importing chats from JSON...({index + 1}/{total_row_number})\r")
|
||||
logger.info(f"Imported {total_row_number} chats from JSON{CLEAR_LINE}")
|
||||
with tqdm(total=total_row_number, desc="Importing chats from JSON", unit="chat", leave=False) as pbar:
|
||||
for jid, chat_data in temp_data.items():
|
||||
chat = ChatStore.from_json(chat_data)
|
||||
data.add_chat(jid, chat)
|
||||
pbar.update(1)
|
||||
total_time = pbar.format_dict['elapsed']
|
||||
logger.info(f"Imported {total_row_number} chats from JSON in {convert_time_unit(total_time)}{CLEAR_LINE}")
|
||||
|
||||
|
||||
def incremental_merge(source_dir: str, target_dir: str, media_dir: str, pretty_print_json: int, avoid_encoding_json: bool):
|
||||
@@ -439,7 +439,7 @@ CRYPT14_OFFSETS = (
|
||||
{"iv": 67, "db": 193},
|
||||
{"iv": 67, "db": 194},
|
||||
{"iv": 67, "db": 158},
|
||||
{"iv": 67, "db": 196}
|
||||
{"iv": 67, "db": 196},
|
||||
)
|
||||
|
||||
|
||||
@@ -534,7 +534,7 @@ def determine_metadata(content: sqlite3.Row, init_msg: Optional[str]) -> Optiona
|
||||
else:
|
||||
msg = "The security code in this chat changed"
|
||||
elif content["action_type"] == 58:
|
||||
msg = "You blocked this contact"
|
||||
msg = "You blocked/unblocked this contact"
|
||||
elif content["action_type"] == 67:
|
||||
return # (PM) this contact use secure service from Facebook???
|
||||
elif content["action_type"] == 69:
|
||||
@@ -639,11 +639,17 @@ def get_from_string(msg: Dict, chat_id: str) -> str:
|
||||
|
||||
def get_chat_type(chat_id: str) -> str:
|
||||
"""Return the chat type based on the whatsapp id"""
|
||||
if chat_id.endswith("@s.whatsapp.net"):
|
||||
if chat_id == "000000000000000":
|
||||
return "calls"
|
||||
elif chat_id.endswith("@s.whatsapp.net"):
|
||||
return "personal_chat"
|
||||
if chat_id.endswith("@g.us"):
|
||||
elif chat_id.endswith("@g.us"):
|
||||
return "private_group"
|
||||
logger.warning("Unknown chat type for %s, defaulting to private_group", chat_id)
|
||||
elif chat_id == "status@broadcast":
|
||||
return "status_broadcast"
|
||||
elif chat_id.endswith("@broadcast"):
|
||||
return "broadcast_channel"
|
||||
logger.warning(f"Unknown chat type for {chat_id}, defaulting to private_group{CLEAR_LINE}")
|
||||
return "private_group"
|
||||
|
||||
|
||||
@@ -674,34 +680,35 @@ def telegram_json_format(jik: str, data: Dict, timezone_offset) -> Dict:
|
||||
except ValueError:
|
||||
# not a real chat: e.g. statusbroadcast
|
||||
chat_id = 0
|
||||
obj = {
|
||||
"name": data["name"] if data["name"] else jik,
|
||||
"type": get_chat_type(jik),
|
||||
"id": chat_id,
|
||||
"messages": [ {
|
||||
"id": int(msgId),
|
||||
"type": "message",
|
||||
"date": timing.format_timestamp(msg["timestamp"], "%Y-%m-%dT%H:%M:%S"),
|
||||
"date_unixtime": int(msg["timestamp"]),
|
||||
"from": get_from_string(msg, chat_id),
|
||||
"from_id": get_from_id(msg, chat_id),
|
||||
"reply_to_message_id": get_reply_id(data, msg["reply"]),
|
||||
"text": msg["data"],
|
||||
"text_entities": [
|
||||
{
|
||||
# TODO this will lose formatting and different types
|
||||
"type": "plain",
|
||||
"text": msg["data"],
|
||||
}
|
||||
],
|
||||
} for msgId, msg in data["messages"].items()]
|
||||
json_obj = {
|
||||
"name": data["name"] if data["name"] else jik,
|
||||
"type": get_chat_type(jik),
|
||||
"id": chat_id,
|
||||
"messages": [ {
|
||||
"id": int(msgId),
|
||||
"type": "message",
|
||||
"date": timing.format_timestamp(msg["timestamp"], "%Y-%m-%dT%H:%M:%S"),
|
||||
"date_unixtime": int(msg["timestamp"]),
|
||||
"from": get_from_string(msg, chat_id),
|
||||
"from_id": get_from_id(msg, chat_id),
|
||||
"reply_to_message_id": get_reply_id(data, msg["reply"]),
|
||||
"text": msg["data"],
|
||||
"text_entities": [
|
||||
{
|
||||
# TODO this will lose formatting and different types
|
||||
"type": "plain",
|
||||
"text": msg["data"],
|
||||
}
|
||||
],
|
||||
}
|
||||
for msgId, msg in data["messages"].items()]
|
||||
}
|
||||
# remove empty messages and replies
|
||||
for msg_id, msg in enumerate(obj["messages"]):
|
||||
for msg_id, msg in enumerate(json_obj["messages"]):
|
||||
if not msg["reply_to_message_id"]:
|
||||
del obj["messages"][msg_id]["reply_to_message_id"]
|
||||
obj["messages"] = [m for m in obj["messages"] if m["text"]]
|
||||
return obj
|
||||
del json_obj["messages"][msg_id]["reply_to_message_id"]
|
||||
json_obj["messages"] = [m for m in json_obj["messages"] if m["text"]]
|
||||
return json_obj
|
||||
|
||||
|
||||
class WhatsAppIdentifier(StrEnum):
|
||||
|
||||
@@ -281,7 +281,9 @@
|
||||
{% filter escape %}{{ msg.data }}{% endfilter %}
|
||||
{% endif %}
|
||||
{% if msg.caption is not none %}
|
||||
{{ msg.caption | urlize(none, true, '_blank') }}
|
||||
<p class='mt-1 {% if "audio/" in msg.mime %}text-[#808080]{% endif %}'>
|
||||
{{ msg.caption | urlize(none, true, '_blank') }}
|
||||
</p>
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
@@ -351,7 +353,9 @@
|
||||
{% filter escape %}{{ msg.data }}{% endfilter %}
|
||||
{% endif %}
|
||||
{% if msg.caption is not none %}
|
||||
{{ msg.caption | urlize(none, true, '_blank') }}
|
||||
<p class='mt-1 {% if "audio/" in msg.mime %}text-[#808080]{% endif %}'>
|
||||
{{ msg.caption | urlize(none, true, '_blank') }}
|
||||
</p>
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
|
||||
@@ -36,17 +36,19 @@ classifiers = [
|
||||
requires-python = ">=3.10"
|
||||
dependencies = [
|
||||
"jinja2",
|
||||
"bleach"
|
||||
"bleach",
|
||||
"tqdm"
|
||||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
android_backup = ["pycryptodome", "javaobj-py3"]
|
||||
ios_backup = ["iphone_backup_decrypt @ git+https://github.com/KnugiHK/iphone_backup_decrypt"]
|
||||
crypt12 = ["pycryptodome"]
|
||||
crypt14 = ["pycryptodome"]
|
||||
crypt15 = ["pycryptodome", "javaobj-py3"]
|
||||
all = ["pycryptodome", "javaobj-py3"]
|
||||
everything = ["pycryptodome", "javaobj-py3"]
|
||||
backup = ["pycryptodome", "javaobj-py3"]
|
||||
all = ["pycryptodome", "javaobj-py3", "iphone_backup_decrypt @ git+https://github.com/KnugiHK/iphone_backup_decrypt"]
|
||||
everything = ["pycryptodome", "javaobj-py3", "iphone_backup_decrypt @ git+https://github.com/KnugiHK/iphone_backup_decrypt"]
|
||||
backup = ["pycryptodome", "javaobj-py3", "iphone_backup_decrypt @ git+https://github.com/KnugiHK/iphone_backup_decrypt"]
|
||||
|
||||
[project.scripts]
|
||||
wtsexporter = "Whatsapp_Chat_Exporter.__main__:main"
|
||||
|
||||
27
tests/conftest.py
Normal file
27
tests/conftest.py
Normal file
@@ -0,0 +1,27 @@
|
||||
import pytest
|
||||
import os
|
||||
|
||||
def pytest_collection_modifyitems(config, items):
|
||||
"""
|
||||
Moves test_nuitka_binary.py to the end and fails if the file is missing.
|
||||
"""
|
||||
target_file = "test_nuitka_binary.py"
|
||||
|
||||
# Sanity Check: Ensure the file actually exists in the tests directory
|
||||
test_dir = os.path.join(config.rootdir, "tests")
|
||||
file_path = os.path.join(test_dir, target_file)
|
||||
|
||||
if not os.path.exists(file_path):
|
||||
pytest.exit(f"\n[FATAL] Required test file '{target_file}' not found in {test_dir}. "
|
||||
f"Order enforcement failed!", returncode=1)
|
||||
|
||||
nuitka_tests = []
|
||||
remaining_tests = []
|
||||
|
||||
for item in items:
|
||||
if target_file in item.nodeid:
|
||||
nuitka_tests.append(item)
|
||||
else:
|
||||
remaining_tests.append(item)
|
||||
|
||||
items[:] = remaining_tests + nuitka_tests
|
||||
Reference in New Issue
Block a user