From 3ed269e17f05c4d39786b2819c4275c922c8753b Mon Sep 17 00:00:00 2001
From: KnugiHK <24708955+KnugiHK@users.noreply.github.com>
Date: Tue, 20 Jun 2023 19:12:38 +0800
Subject: [PATCH] Support a lot of metadata in Android's new schema
---
Whatsapp_Chat_Exporter/extract.py | 60 ++++++++++++++++++-----
Whatsapp_Chat_Exporter/utility.py | 81 +++++++++++++++++++++++++++++++
2 files changed, 128 insertions(+), 13 deletions(-)
diff --git a/Whatsapp_Chat_Exporter/extract.py b/Whatsapp_Chat_Exporter/extract.py
index 47efd50..1b72dc7 100644
--- a/Whatsapp_Chat_Exporter/extract.py
+++ b/Whatsapp_Chat_Exporter/extract.py
@@ -13,7 +13,8 @@ from mimetypes import MimeTypes
from hashlib import sha256
from base64 import b64decode, b64encode
from Whatsapp_Chat_Exporter.data_model import ChatStore, Message
-from Whatsapp_Chat_Exporter.utility import MAX_SIZE, ROW_SIZE, Device, rendering, sanitize_except, determine_day, Crypt
+from Whatsapp_Chat_Exporter.utility import MAX_SIZE, ROW_SIZE, Device, determine_metadata
+from Whatsapp_Chat_Exporter.utility import rendering, sanitize_except, determine_day, Crypt
from Whatsapp_Chat_Exporter.utility import brute_force_offset, CRYPT14_OFFSETS
try:
@@ -225,7 +226,11 @@ def messages(db, data, media_folder):
jid_group.raw_string as group_sender_jid,
chat.subject as chat_subject,
missed_call_logs.video_call,
- message.sender_jid_row_id
+ message.sender_jid_row_id,
+ message_system.action_type,
+ message_system_group.is_me_joined,
+ jid_old.raw_string as old_jid,
+ jid_new.raw_string as new_jid
FROM message
LEFT JOIN message_quoted
ON message_quoted.message_row_id = message._id
@@ -245,6 +250,16 @@ def messages(db, data, media_folder):
ON jid_group._id = message.sender_jid_row_id
LEFT JOIN missed_call_logs
ON message._id = missed_call_logs.message_row_id
+ LEFT JOIN message_system
+ ON message_system.message_row_id = message._id
+ LEFT JOIN message_system_group
+ ON message_system_group.message_row_id = message._id
+ LEFT JOIN message_system_number_change
+ ON message_system_number_change.message_row_id = message._id
+ LEFT JOIN jid jid_old
+ ON jid_old._id = message_system_number_change.old_jid_row_id
+ LEFT JOIN jid jid_new
+ ON jid_new._id = message_system_number_change.new_jid_row_id
WHERE key_remote_jid <> '-1';"""
)
except Exception as e:
@@ -266,8 +281,12 @@ def messages(db, data, media_folder):
data[content["key_remote_jid"]] = ChatStore(Device.ANDROID, content["chat_subject"])
if content["key_remote_jid"] is None:
continue # Not sure
+ if "sender_jid_row_id" in content:
+ sender_jid_row_id = content["sender_jid_row_id"]
+ else:
+ sender_jid_row_id = None
message = Message(
- from_me=content["key_from_me"],
+ from_me=not sender_jid_row_id and content["key_from_me"],
timestamp=content["timestamp"],
time=content["timestamp"],
key_id=content["key_id"],
@@ -283,7 +302,6 @@ def messages(db, data, media_folder):
i += 1
content = c.fetchone()
continue
- invalid = False
if "-" in content["key_remote_jid"] and content["key_from_me"] == 0:
name = fallback = None
if table_message:
@@ -320,18 +338,18 @@ def messages(db, data, media_folder):
message.caption = None
if content["status"] == 6: # 6 = Metadata, otherwise assume a message
- if (not table_message and "-" in content["key_remote_jid"]) or \
- (table_message and content["chat_subject"] is not None):
+ if not table_message and "-" in content["key_remote_jid"]:
# Is Group
if content["data"] is not None and content["data"] != "":
try:
int(content["data"])
except ValueError:
- msg = f"The group name changed to {content['data']}"
+ msg = f'''The group name changed to "{content['data']}"'''
message.data = msg
message.meta = True
else:
- invalid = True
+ message.meta = True
+ message.data = None
else:
thumb_image = content["thumb_image"] # Not applicable for new schema
if thumb_image is not None:
@@ -344,7 +362,7 @@ def messages(db, data, media_folder):
else:
name_right = added.split('@')[0]
if content["remote_resource"] is not None:
- if content["remote_resource"] in data:
+ if content["remote_resource"] in data and data[content["remote_resource"]].name is not None:
name_left = data[content["remote_resource"]].name
else:
name_left = content["remote_resource"].split('@')[0]
@@ -360,7 +378,23 @@ def messages(db, data, media_folder):
message.meta = True
else:
if content["data"] is None:
- invalid = True
+ message.meta = True
+ message.data = None
+
+ elif table_message:
+ message.meta = True
+ name = fallback = None
+ if content["sender_jid_row_id"] > 0:
+ _jid = content["group_sender_jid"]
+ if _jid in data:
+ name = data[_jid].name
+ if "@" in _jid:
+ fallback = _jid.split('@')[0]
+ else:
+ name = "You"
+ message.data = determine_metadata(content, name or fallback)
+ if isinstance(message.data, str) and "
" in message.data:
+ message.safe = True
else:
# Private chat
if content["video_call"] is not None: # Missed call
@@ -370,7 +404,8 @@ def messages(db, data, media_folder):
elif content["video_call"] == 0:
message.data = "A voice call was missed"
elif content["data"] is None and content["thumb_image"] is None:
- invalid = True # Unhandle metadata
+ message.meta = True
+ message.data = None
else:
# Real message
if content["media_wa_type"] == 20: # Sticker is a message
@@ -407,8 +442,7 @@ def messages(db, data, media_folder):
msg = msg.replace("\n", "
")
message.data = msg
- if not invalid:
- data[content["key_remote_jid"]].add_message(content["_id"], message)
+ data[content["key_remote_jid"]].add_message(content["_id"], message)
i += 1
if i % 1000 == 0:
print(f"Processing messages...({i}/{total_row_number})", end="\r")
diff --git a/Whatsapp_Chat_Exporter/utility.py b/Whatsapp_Chat_Exporter/utility.py
index f738c5e..49f993f 100644
--- a/Whatsapp_Chat_Exporter/utility.py
+++ b/Whatsapp_Chat_Exporter/utility.py
@@ -152,5 +152,86 @@ def brute_force_offset(max_iv=200, max_db=200):
yield iv, iv + 16, db
+def determine_metadata(content, init_msg):
+ msg = init_msg if init_msg else ""
+ if content["is_me_joined"] == 1: # Override
+ return f"You were added into the group by {msg}"
+ if content["action_type"] == 1:
+ msg += f''' changed the group name to "{content['data']}"'''
+ elif content["action_type"] == 4:
+ msg += " was added to the group"
+ elif content["action_type"] == 5:
+ msg += " left the group"
+ elif content["action_type"] == 6:
+ msg += f" changed the group icon"
+ elif content["action_type"] == 7:
+ msg = "You were removed"
+ elif content["action_type"] == 8:
+ msg += ("WhatsApp Internal Error Occurred: "
+ "you cannot send message to this group")
+ elif content["action_type"] == 9:
+ msg += " created a broadcast channel"
+ elif content["action_type"] == 10:
+ try:
+ old = content['old_jid'].split('@')[0]
+ new = content['new_jid'].split('@')[0]
+ except (AttributeError, IndexError):
+ return None
+ else:
+ msg = f"{old} changed their number to {new}"
+ elif content["action_type"] == 11:
+ msg += f''' created a group with name: "{content['data']}"'''
+ elif content["action_type"] == 12:
+ msg += f" added someone" # TODO: Find out who
+ elif content["action_type"] == 13:
+ return # Someone left the group
+ elif content["action_type"] == 14:
+ msg += f" removed someone" # TODO: Find out who
+ elif content["action_type"] == 15:
+ return # Someone promoted someone as an admin
+ elif content["action_type"] == 18:
+ if msg != "You":
+ msg = f"The security code between you and {msg} changed"
+ else:
+ msg = "The security code in this chat changed"
+ elif content["action_type"] == 19:
+ msg = "This chat is now end-to-end encrypted"
+ elif content["action_type"] == 20:
+ msg = "Someone joined this group by using a invite link" # TODO: Find out who
+ elif content["action_type"] == 27:
+ msg += " changed the group description to:
"
+ msg += content['data'].replace("\n", '
')
+ elif content["action_type"] == 28:
+ try:
+ old = content['old_jid'].split('@')[0]
+ new = content['new_jid'].split('@')[0]
+ except (AttributeError, IndexError):
+ return None
+ else:
+ msg = f"{old} changed their number to {new}"
+ elif content["action_type"] == 46:
+ return # Voice message in PM??? Seems no need to handle.
+ elif content["action_type"] == 47:
+ msg = "The contact is an official business account"
+ elif content["action_type"] == 50:
+ msg = "The contact's account type changed from business to standard"
+ elif content["action_type"] == 56:
+ msg = "Messgae timer was enabled/updated/disabled"
+ elif content["action_type"] == 57:
+ if msg != "You":
+ msg = f"The security code between you and {msg} changed"
+ else:
+ msg = "The security code in this chat changed"
+ elif content["action_type"] == 58:
+ msg = "You blocked this contact"
+ elif content["action_type"] == 67:
+ return # (PM) this contact use secure service from Facebook???
+ elif content["action_type"] == 69:
+ return # (PM) this contact use secure service from Facebook??? What's the difference with 67????
+ else:
+ return # Unsupported
+ return msg
+
+
# iOS Specific
APPLE_TIME = datetime.timestamp(datetime(2001, 1, 1))