From 23af55d6454886114ad5883b5812399ba03f96d7 Mon Sep 17 00:00:00 2001 From: KnugiHK <24708955+KnugiHK@users.noreply.github.com> Date: Sat, 4 Jan 2025 18:18:34 +0800 Subject: [PATCH] Implement empty chat filtering from SQL #112 This commit also removed the old empty chat filtering logic. --- Whatsapp_Chat_Exporter/__main__.py | 20 ++++++-------- Whatsapp_Chat_Exporter/android_handler.py | 33 ++++++++++++++++------- Whatsapp_Chat_Exporter/ios_handler.py | 6 ++--- Whatsapp_Chat_Exporter/utility.py | 10 +++---- 4 files changed, 39 insertions(+), 30 deletions(-) diff --git a/Whatsapp_Chat_Exporter/__main__.py b/Whatsapp_Chat_Exporter/__main__.py index fcf231c..31ef6cf 100644 --- a/Whatsapp_Chat_Exporter/__main__.py +++ b/Whatsapp_Chat_Exporter/__main__.py @@ -17,8 +17,8 @@ else: from Whatsapp_Chat_Exporter import exported_handler, android_handler from Whatsapp_Chat_Exporter import ios_handler, ios_media_handler from Whatsapp_Chat_Exporter.data_model import ChatStore -from Whatsapp_Chat_Exporter.utility import APPLE_TIME, Crypt, DbType, chat_is_empty, readable_to_bytes -from Whatsapp_Chat_Exporter.utility import check_update, import_from_json, sanitize_filename, bytes_to_readable +from Whatsapp_Chat_Exporter.utility import APPLE_TIME, Crypt, DbType, readable_to_bytes, check_update +from Whatsapp_Chat_Exporter.utility import import_from_json, sanitize_filename, bytes_to_readable from argparse import ArgumentParser, SUPPRESS from datetime import datetime from sys import exit @@ -254,7 +254,9 @@ def main(): dest="filter_empty", default=True, action='store_false', - help="By default, the exporter will not render chats with no valid message. Setting this flag will cause the exporter to render those." + help=("By default, the exporter will not render chats with no valid message. " + "Setting this flag will cause the exporter to render those. " + "This is useful if chat(s) are missing from the output") ) parser.add_argument( "--per-chat", @@ -504,9 +506,9 @@ def main(): if os.path.isfile(msg_db): with sqlite3.connect(msg_db) as db: db.row_factory = sqlite3.Row - messages(db, data, args.media, args.timezone_offset, args.filter_date, filter_chat) - media(db, data, args.media, args.filter_date, filter_chat, args.separate_media) - vcard(db, data, args.media, args.filter_date, filter_chat) + messages(db, data, args.media, args.timezone_offset, args.filter_date, filter_chat, args.filter_empty) + media(db, data, args.media, args.filter_date, filter_chat, args.filter_empty, args.separate_media) + vcard(db, data, args.media, args.filter_date, filter_chat, args.filter_empty) if args.android: android_handler.calls(db, data, args.timezone_offset, filter_chat) elif args.ios and args.call_db_ios is not None: @@ -525,7 +527,6 @@ def main(): args.offline, args.size, args.no_avatar, - args.filter_empty, args.whatsapp_theme ) else: @@ -563,7 +564,6 @@ def main(): args.offline, args.size, args.no_avatar, - args.filter_empty, args.whatsapp_theme ) for file in glob.glob(r'*.*'): @@ -578,7 +578,6 @@ def main(): args.offline, args.size, args.no_avatar, - args.filter_empty, args.whatsapp_theme ) @@ -587,9 +586,6 @@ def main(): android_handler.create_txt(data, args.text_format) if args.json and not args.import_json: - if args.filter_empty: - data = {k: v for k, v in data.items() if not chat_is_empty(v)} - if args.enrich_from_vcards is not None and not contact_store.is_empty(): contact_store.enrich_from_vcards(data) diff --git a/Whatsapp_Chat_Exporter/android_handler.py b/Whatsapp_Chat_Exporter/android_handler.py index 44a3724..38a3e3c 100644 --- a/Whatsapp_Chat_Exporter/android_handler.py +++ b/Whatsapp_Chat_Exporter/android_handler.py @@ -12,10 +12,10 @@ from hashlib import sha256 from base64 import b64decode, b64encode from datetime import datetime from Whatsapp_Chat_Exporter.data_model import ChatStore, Message -from Whatsapp_Chat_Exporter.utility import CURRENT_TZ_OFFSET, MAX_SIZE, ROW_SIZE, DbType, convert_time_unit, determine_metadata -from Whatsapp_Chat_Exporter.utility import rendering, Crypt, Device, get_file_name, setup_template, JidType +from Whatsapp_Chat_Exporter.utility import CURRENT_TZ_OFFSET, MAX_SIZE, ROW_SIZE, DbType, convert_time_unit, determine_metadata, get_cond_for_empty +from Whatsapp_Chat_Exporter.utility import rendering, Crypt, Device, get_file_name, setup_template from Whatsapp_Chat_Exporter.utility import brute_force_offset, CRYPT14_OFFSETS, get_status_location -from Whatsapp_Chat_Exporter.utility import get_chat_condition, slugify, bytes_to_readable, chat_is_empty +from Whatsapp_Chat_Exporter.utility import get_chat_condition, slugify, bytes_to_readable, JidType try: import zlib @@ -173,7 +173,7 @@ def contacts(db, data): row = c.fetchone() -def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat): +def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat, filter_empty): # Get message history c = db.cursor() try: @@ -181,7 +181,10 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat): FROM messages INNER JOIN jid ON messages.key_remote_jid = jid.raw_string + LEFT JOIN chat + ON chat.jid_row_id = jid._id WHERE 1=1 + {get_cond_for_empty(filter_empty, "messages.key_remote_jid", "messages.needs_push")} {f'AND timestamp {filter_date}' if filter_date is not None else ''} {get_chat_condition(filter_chat[0], True, ["messages.key_remote_jid", "messages.remote_resource"], "jid", "android")} {get_chat_condition(filter_chat[1], False, ["messages.key_remote_jid", "messages.remote_resource"], "jid", "android")}""") @@ -196,6 +199,7 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat): LEFT JOIN jid jid_group ON jid_group._id = message.sender_jid_row_id WHERE 1=1 + {get_cond_for_empty(filter_empty, "jid.raw_string", "broadcast")} {f'AND timestamp {filter_date}' if filter_date is not None else ''} {get_chat_condition(filter_chat[0], True, ["jid.raw_string", "jid_group.raw_string"], "jid", "android")} {get_chat_condition(filter_chat[1], False, ["jid.raw_string", "jid_group.raw_string"], "jid", "android")}""") @@ -253,6 +257,7 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat): LEFT JOIN receipt_user ON receipt_user.message_row_id = messages._id WHERE messages.key_remote_jid <> '-1' + {get_cond_for_empty(filter_empty, "messages.key_remote_jid", "messages.needs_push")} {f'AND messages.timestamp {filter_date}' if filter_date is not None else ''} {get_chat_condition(filter_chat[0], True, ["messages.key_remote_jid", "messages.remote_resource"], "jid_global", "android")} {get_chat_condition(filter_chat[1], False, ["messages.key_remote_jid", "messages.remote_resource"], "jid_global", "android")} @@ -321,6 +326,7 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat): LEFT JOIN receipt_user ON receipt_user.message_row_id = message._id WHERE key_remote_jid <> '-1' + {get_cond_for_empty(filter_empty, "key_remote_jid", "broadcast")} {f'AND message.timestamp {filter_date}' if filter_date is not None else ''} {get_chat_condition(filter_chat[0], True, ["key_remote_jid", "jid_group.raw_string"], "jid_global", "android")} {get_chat_condition(filter_chat[1], False, ["key_remote_jid", "jid_group.raw_string"], "jid_global", "android")} @@ -488,7 +494,7 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat): print(f"Processing messages...({total_row_number}/{total_row_number})", end="\r") -def media(db, data, media_folder, filter_date, filter_chat, separate_media=True): +def media(db, data, media_folder, filter_date, filter_chat, filter_empty, separate_media=True): # Get media c = db.cursor() try: @@ -498,7 +504,10 @@ def media(db, data, media_folder, filter_date, filter_chat, separate_media=True) ON message_media.message_row_id = messages._id INNER JOIN jid ON messages.key_remote_jid = jid.raw_string + LEFT JOIN chat + ON chat.jid_row_id = jid._id WHERE 1=1 + {get_cond_for_empty(filter_empty, "key_remote_jid", "messages.needs_push")} {f'AND messages.timestamp {filter_date}' if filter_date is not None else ''} {get_chat_condition(filter_chat[0], True, ["messages.key_remote_jid", "remote_resource"], "jid", "android")} {get_chat_condition(filter_chat[1], False, ["messages.key_remote_jid", "remote_resource"], "jid", "android")}""") @@ -514,6 +523,7 @@ def media(db, data, media_folder, filter_date, filter_chat, separate_media=True) LEFT JOIN jid jid_group ON jid_group._id = message.sender_jid_row_id WHERE 1=1 + {get_cond_for_empty(filter_empty, "jid.raw_string", "broadcast")} {f'AND message.timestamp {filter_date}' if filter_date is not None else ''} {get_chat_condition(filter_chat[0], True, ["jid.raw_string", "jid_group.raw_string"], "jid", "android")} {get_chat_condition(filter_chat[1], False, ["jid.raw_string", "jid_group.raw_string"], "jid", "android")}""") @@ -536,7 +546,10 @@ def media(db, data, media_folder, filter_date, filter_chat, separate_media=True) ON message_media.file_hash = media_hash_thumbnail.media_hash INNER JOIN jid ON messages.key_remote_jid = jid.raw_string + LEFT JOIN chat + ON chat.jid_row_id = jid._id WHERE jid.type <> 7 + {get_cond_for_empty(filter_empty, "key_remote_jid", "broadcast")} {f'AND messages.timestamp {filter_date}' if filter_date is not None else ''} {get_chat_condition(filter_chat[0], True, ["messages.key_remote_jid", "remote_resource"], "jid", "android")} {get_chat_condition(filter_chat[1], False, ["messages.key_remote_jid", "remote_resource"], "jid", "android")} @@ -563,6 +576,7 @@ def media(db, data, media_folder, filter_date, filter_chat, separate_media=True) LEFT JOIN jid jid_group ON jid_group._id = message.sender_jid_row_id WHERE jid.type <> 7 + {get_cond_for_empty(filter_empty, "key_remote_jid", "broadcast")} {f'AND message.timestamp {filter_date}' if filter_date is not None else ''} {get_chat_condition(filter_chat[0], True, ["key_remote_jid", "jid_group.raw_string"], "jid", "android")} {get_chat_condition(filter_chat[1], False, ["key_remote_jid", "jid_group.raw_string"], "jid", "android")} @@ -613,7 +627,7 @@ def media(db, data, media_folder, filter_date, filter_chat, separate_media=True) f"Processing media...({total_row_number}/{total_row_number})", end="\r") -def vcard(db, data, media_folder, filter_date, filter_chat): +def vcard(db, data, media_folder, filter_date, filter_chat, filter_empty): c = db.cursor() try: c.execute(f"""SELECT message_row_id, @@ -625,7 +639,10 @@ def vcard(db, data, media_folder, filter_date, filter_chat): ON messages_vcards.message_row_id = messages._id INNER JOIN jid ON messages.key_remote_jid = jid.raw_string + LEFT JOIN chat + ON chat.jid_row_id = jid._id WHERE 1=1 + {get_cond_for_empty(filter_empty, "key_remote_jid", "messages.needs_push")} {f'AND messages.timestamp {filter_date}' if filter_date is not None else ''} {get_chat_condition(filter_chat[0], True, ["messages.key_remote_jid", "remote_resource"], "jid", "android")} {get_chat_condition(filter_chat[1], False, ["messages.key_remote_jid", "remote_resource"], "jid", "android")} @@ -646,6 +663,7 @@ def vcard(db, data, media_folder, filter_date, filter_chat): LEFT JOIN jid jid_group ON jid_group._id = message.sender_jid_row_id WHERE 1=1 + {get_cond_for_empty(filter_empty, "key_remote_jid", "broadcast")} {f'AND message.timestamp {filter_date}' if filter_date is not None else ''} {get_chat_condition(filter_chat[0], True, ["key_remote_jid", "jid_group.raw_string"], "jid", "android")} {get_chat_condition(filter_chat[1], False, ["key_remote_jid", "jid_group.raw_string"], "jid", "android")} @@ -760,7 +778,6 @@ def create_html( offline_static=False, maximum_size=None, no_avatar=False, - filter_empty=True, experimental=False ): template = setup_template(template, no_avatar, experimental) @@ -775,8 +792,6 @@ def create_html( for current, contact in enumerate(data): chat = data[contact] - if filter_empty and chat_is_empty(chat): - continue safe_file_name, name = get_file_name(contact, chat) if maximum_size is not None: diff --git a/Whatsapp_Chat_Exporter/ios_handler.py b/Whatsapp_Chat_Exporter/ios_handler.py index 8034eba..61964c7 100644 --- a/Whatsapp_Chat_Exporter/ios_handler.py +++ b/Whatsapp_Chat_Exporter/ios_handler.py @@ -27,7 +27,7 @@ def contacts(db, data): content = c.fetchone() -def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat): +def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat, filter_empty): c = db.cursor() # Get contacts c.execute( @@ -227,7 +227,7 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat): f"Processing messages...({total_row_number}/{total_row_number})", end="\r") -def media(db, data, media_folder, filter_date, filter_chat, separate_media=False): +def media(db, data, media_folder, filter_date, filter_chat, filter_empty, separate_media=False): c = db.cursor() # Get media c.execute(f"""SELECT count() @@ -308,7 +308,7 @@ def media(db, data, media_folder, filter_date, filter_chat, separate_media=False f"Processing media...({total_row_number}/{total_row_number})", end="\r") -def vcard(db, data, media_folder, filter_date, filter_chat): +def vcard(db, data, media_folder, filter_date, filter_chat, filter_empty): c = db.cursor() c.execute(f"""SELECT DISTINCT ZWAVCARDMENTION.ZMEDIAITEM, ZWAMEDIAITEM.ZMESSAGE, diff --git a/Whatsapp_Chat_Exporter/utility.py b/Whatsapp_Chat_Exporter/utility.py index 0a72c74..d7de1a4 100644 --- a/Whatsapp_Chat_Exporter/utility.py +++ b/Whatsapp_Chat_Exporter/utility.py @@ -220,6 +220,10 @@ def get_file_name(contact: str, chat: ChatStore): return sanitize_filename(file_name), name +def get_cond_for_empty(enable, jid_field: str, broadcast_field: str): + return f"AND (chat.sort_timestamp IS NOT NULL OR {jid_field}='status@broadcast' OR {broadcast_field}>0)" if enable else "" + + def get_chat_condition(filter, include, columns, jid=None, platform=None): if filter is not None: conditions = [] @@ -245,12 +249,6 @@ def get_chat_condition(filter, include, columns, jid=None, platform=None): else: return "" -def _is_message_empty(message): - return (message.data is None or message.data == "") and not message.media - -def chat_is_empty(chat: ChatStore): - return len(chat.messages) == 0 or all(_is_message_empty(message) for message in chat.messages.values()) - # Android Specific CRYPT14_OFFSETS = (