diff --git a/Whatsapp_Chat_Exporter/extract.py b/Whatsapp_Chat_Exporter/extract.py index 47efd50..1b72dc7 100644 --- a/Whatsapp_Chat_Exporter/extract.py +++ b/Whatsapp_Chat_Exporter/extract.py @@ -13,7 +13,8 @@ from mimetypes import MimeTypes from hashlib import sha256 from base64 import b64decode, b64encode from Whatsapp_Chat_Exporter.data_model import ChatStore, Message -from Whatsapp_Chat_Exporter.utility import MAX_SIZE, ROW_SIZE, Device, rendering, sanitize_except, determine_day, Crypt +from Whatsapp_Chat_Exporter.utility import MAX_SIZE, ROW_SIZE, Device, determine_metadata +from Whatsapp_Chat_Exporter.utility import rendering, sanitize_except, determine_day, Crypt from Whatsapp_Chat_Exporter.utility import brute_force_offset, CRYPT14_OFFSETS try: @@ -225,7 +226,11 @@ def messages(db, data, media_folder): jid_group.raw_string as group_sender_jid, chat.subject as chat_subject, missed_call_logs.video_call, - message.sender_jid_row_id + message.sender_jid_row_id, + message_system.action_type, + message_system_group.is_me_joined, + jid_old.raw_string as old_jid, + jid_new.raw_string as new_jid FROM message LEFT JOIN message_quoted ON message_quoted.message_row_id = message._id @@ -245,6 +250,16 @@ def messages(db, data, media_folder): ON jid_group._id = message.sender_jid_row_id LEFT JOIN missed_call_logs ON message._id = missed_call_logs.message_row_id + LEFT JOIN message_system + ON message_system.message_row_id = message._id + LEFT JOIN message_system_group + ON message_system_group.message_row_id = message._id + LEFT JOIN message_system_number_change + ON message_system_number_change.message_row_id = message._id + LEFT JOIN jid jid_old + ON jid_old._id = message_system_number_change.old_jid_row_id + LEFT JOIN jid jid_new + ON jid_new._id = message_system_number_change.new_jid_row_id WHERE key_remote_jid <> '-1';""" ) except Exception as e: @@ -266,8 +281,12 @@ def messages(db, data, media_folder): data[content["key_remote_jid"]] = ChatStore(Device.ANDROID, content["chat_subject"]) if content["key_remote_jid"] is None: continue # Not sure + if "sender_jid_row_id" in content: + sender_jid_row_id = content["sender_jid_row_id"] + else: + sender_jid_row_id = None message = Message( - from_me=content["key_from_me"], + from_me=not sender_jid_row_id and content["key_from_me"], timestamp=content["timestamp"], time=content["timestamp"], key_id=content["key_id"], @@ -283,7 +302,6 @@ def messages(db, data, media_folder): i += 1 content = c.fetchone() continue - invalid = False if "-" in content["key_remote_jid"] and content["key_from_me"] == 0: name = fallback = None if table_message: @@ -320,18 +338,18 @@ def messages(db, data, media_folder): message.caption = None if content["status"] == 6: # 6 = Metadata, otherwise assume a message - if (not table_message and "-" in content["key_remote_jid"]) or \ - (table_message and content["chat_subject"] is not None): + if not table_message and "-" in content["key_remote_jid"]: # Is Group if content["data"] is not None and content["data"] != "": try: int(content["data"]) except ValueError: - msg = f"The group name changed to {content['data']}" + msg = f'''The group name changed to "{content['data']}"''' message.data = msg message.meta = True else: - invalid = True + message.meta = True + message.data = None else: thumb_image = content["thumb_image"] # Not applicable for new schema if thumb_image is not None: @@ -344,7 +362,7 @@ def messages(db, data, media_folder): else: name_right = added.split('@')[0] if content["remote_resource"] is not None: - if content["remote_resource"] in data: + if content["remote_resource"] in data and data[content["remote_resource"]].name is not None: name_left = data[content["remote_resource"]].name else: name_left = content["remote_resource"].split('@')[0] @@ -360,7 +378,23 @@ def messages(db, data, media_folder): message.meta = True else: if content["data"] is None: - invalid = True + message.meta = True + message.data = None + + elif table_message: + message.meta = True + name = fallback = None + if content["sender_jid_row_id"] > 0: + _jid = content["group_sender_jid"] + if _jid in data: + name = data[_jid].name + if "@" in _jid: + fallback = _jid.split('@')[0] + else: + name = "You" + message.data = determine_metadata(content, name or fallback) + if isinstance(message.data, str) and "
" in message.data: + message.safe = True else: # Private chat if content["video_call"] is not None: # Missed call @@ -370,7 +404,8 @@ def messages(db, data, media_folder): elif content["video_call"] == 0: message.data = "A voice call was missed" elif content["data"] is None and content["thumb_image"] is None: - invalid = True # Unhandle metadata + message.meta = True + message.data = None else: # Real message if content["media_wa_type"] == 20: # Sticker is a message @@ -407,8 +442,7 @@ def messages(db, data, media_folder): msg = msg.replace("\n", "
") message.data = msg - if not invalid: - data[content["key_remote_jid"]].add_message(content["_id"], message) + data[content["key_remote_jid"]].add_message(content["_id"], message) i += 1 if i % 1000 == 0: print(f"Processing messages...({i}/{total_row_number})", end="\r") diff --git a/Whatsapp_Chat_Exporter/utility.py b/Whatsapp_Chat_Exporter/utility.py index f738c5e..49f993f 100644 --- a/Whatsapp_Chat_Exporter/utility.py +++ b/Whatsapp_Chat_Exporter/utility.py @@ -152,5 +152,86 @@ def brute_force_offset(max_iv=200, max_db=200): yield iv, iv + 16, db +def determine_metadata(content, init_msg): + msg = init_msg if init_msg else "" + if content["is_me_joined"] == 1: # Override + return f"You were added into the group by {msg}" + if content["action_type"] == 1: + msg += f''' changed the group name to "{content['data']}"''' + elif content["action_type"] == 4: + msg += " was added to the group" + elif content["action_type"] == 5: + msg += " left the group" + elif content["action_type"] == 6: + msg += f" changed the group icon" + elif content["action_type"] == 7: + msg = "You were removed" + elif content["action_type"] == 8: + msg += ("WhatsApp Internal Error Occurred: " + "you cannot send message to this group") + elif content["action_type"] == 9: + msg += " created a broadcast channel" + elif content["action_type"] == 10: + try: + old = content['old_jid'].split('@')[0] + new = content['new_jid'].split('@')[0] + except (AttributeError, IndexError): + return None + else: + msg = f"{old} changed their number to {new}" + elif content["action_type"] == 11: + msg += f''' created a group with name: "{content['data']}"''' + elif content["action_type"] == 12: + msg += f" added someone" # TODO: Find out who + elif content["action_type"] == 13: + return # Someone left the group + elif content["action_type"] == 14: + msg += f" removed someone" # TODO: Find out who + elif content["action_type"] == 15: + return # Someone promoted someone as an admin + elif content["action_type"] == 18: + if msg != "You": + msg = f"The security code between you and {msg} changed" + else: + msg = "The security code in this chat changed" + elif content["action_type"] == 19: + msg = "This chat is now end-to-end encrypted" + elif content["action_type"] == 20: + msg = "Someone joined this group by using a invite link" # TODO: Find out who + elif content["action_type"] == 27: + msg += " changed the group description to:
" + msg += content['data'].replace("\n", '
') + elif content["action_type"] == 28: + try: + old = content['old_jid'].split('@')[0] + new = content['new_jid'].split('@')[0] + except (AttributeError, IndexError): + return None + else: + msg = f"{old} changed their number to {new}" + elif content["action_type"] == 46: + return # Voice message in PM??? Seems no need to handle. + elif content["action_type"] == 47: + msg = "The contact is an official business account" + elif content["action_type"] == 50: + msg = "The contact's account type changed from business to standard" + elif content["action_type"] == 56: + msg = "Messgae timer was enabled/updated/disabled" + elif content["action_type"] == 57: + if msg != "You": + msg = f"The security code between you and {msg} changed" + else: + msg = "The security code in this chat changed" + elif content["action_type"] == 58: + msg = "You blocked this contact" + elif content["action_type"] == 67: + return # (PM) this contact use secure service from Facebook??? + elif content["action_type"] == 69: + return # (PM) this contact use secure service from Facebook??? What's the difference with 67???? + else: + return # Unsupported + return msg + + # iOS Specific APPLE_TIME = datetime.timestamp(datetime(2001, 1, 1))