Support a lot of metadata in Android's new schema

This commit is contained in:
KnugiHK
2023-06-20 19:12:38 +08:00
parent 1e3ee5e322
commit 3ed269e17f
2 changed files with 128 additions and 13 deletions

View File

@@ -13,7 +13,8 @@ from mimetypes import MimeTypes
from hashlib import sha256
from base64 import b64decode, b64encode
from Whatsapp_Chat_Exporter.data_model import ChatStore, Message
from Whatsapp_Chat_Exporter.utility import MAX_SIZE, ROW_SIZE, Device, rendering, sanitize_except, determine_day, Crypt
from Whatsapp_Chat_Exporter.utility import MAX_SIZE, ROW_SIZE, Device, determine_metadata
from Whatsapp_Chat_Exporter.utility import rendering, sanitize_except, determine_day, Crypt
from Whatsapp_Chat_Exporter.utility import brute_force_offset, CRYPT14_OFFSETS
try:
@@ -225,7 +226,11 @@ def messages(db, data, media_folder):
jid_group.raw_string as group_sender_jid,
chat.subject as chat_subject,
missed_call_logs.video_call,
message.sender_jid_row_id
message.sender_jid_row_id,
message_system.action_type,
message_system_group.is_me_joined,
jid_old.raw_string as old_jid,
jid_new.raw_string as new_jid
FROM message
LEFT JOIN message_quoted
ON message_quoted.message_row_id = message._id
@@ -245,6 +250,16 @@ def messages(db, data, media_folder):
ON jid_group._id = message.sender_jid_row_id
LEFT JOIN missed_call_logs
ON message._id = missed_call_logs.message_row_id
LEFT JOIN message_system
ON message_system.message_row_id = message._id
LEFT JOIN message_system_group
ON message_system_group.message_row_id = message._id
LEFT JOIN message_system_number_change
ON message_system_number_change.message_row_id = message._id
LEFT JOIN jid jid_old
ON jid_old._id = message_system_number_change.old_jid_row_id
LEFT JOIN jid jid_new
ON jid_new._id = message_system_number_change.new_jid_row_id
WHERE key_remote_jid <> '-1';"""
)
except Exception as e:
@@ -266,8 +281,12 @@ def messages(db, data, media_folder):
data[content["key_remote_jid"]] = ChatStore(Device.ANDROID, content["chat_subject"])
if content["key_remote_jid"] is None:
continue # Not sure
if "sender_jid_row_id" in content:
sender_jid_row_id = content["sender_jid_row_id"]
else:
sender_jid_row_id = None
message = Message(
from_me=content["key_from_me"],
from_me=not sender_jid_row_id and content["key_from_me"],
timestamp=content["timestamp"],
time=content["timestamp"],
key_id=content["key_id"],
@@ -283,7 +302,6 @@ def messages(db, data, media_folder):
i += 1
content = c.fetchone()
continue
invalid = False
if "-" in content["key_remote_jid"] and content["key_from_me"] == 0:
name = fallback = None
if table_message:
@@ -320,18 +338,18 @@ def messages(db, data, media_folder):
message.caption = None
if content["status"] == 6: # 6 = Metadata, otherwise assume a message
if (not table_message and "-" in content["key_remote_jid"]) or \
(table_message and content["chat_subject"] is not None):
if not table_message and "-" in content["key_remote_jid"]:
# Is Group
if content["data"] is not None and content["data"] != "":
try:
int(content["data"])
except ValueError:
msg = f"The group name changed to {content['data']}"
msg = f'''The group name changed to "{content['data']}"'''
message.data = msg
message.meta = True
else:
invalid = True
message.meta = True
message.data = None
else:
thumb_image = content["thumb_image"] # Not applicable for new schema
if thumb_image is not None:
@@ -344,7 +362,7 @@ def messages(db, data, media_folder):
else:
name_right = added.split('@')[0]
if content["remote_resource"] is not None:
if content["remote_resource"] in data:
if content["remote_resource"] in data and data[content["remote_resource"]].name is not None:
name_left = data[content["remote_resource"]].name
else:
name_left = content["remote_resource"].split('@')[0]
@@ -360,7 +378,23 @@ def messages(db, data, media_folder):
message.meta = True
else:
if content["data"] is None:
invalid = True
message.meta = True
message.data = None
elif table_message:
message.meta = True
name = fallback = None
if content["sender_jid_row_id"] > 0:
_jid = content["group_sender_jid"]
if _jid in data:
name = data[_jid].name
if "@" in _jid:
fallback = _jid.split('@')[0]
else:
name = "You"
message.data = determine_metadata(content, name or fallback)
if isinstance(message.data, str) and "<br>" in message.data:
message.safe = True
else:
# Private chat
if content["video_call"] is not None: # Missed call
@@ -370,7 +404,8 @@ def messages(db, data, media_folder):
elif content["video_call"] == 0:
message.data = "A voice call was missed"
elif content["data"] is None and content["thumb_image"] is None:
invalid = True # Unhandle metadata
message.meta = True
message.data = None
else:
# Real message
if content["media_wa_type"] == 20: # Sticker is a message
@@ -407,8 +442,7 @@ def messages(db, data, media_folder):
msg = msg.replace("\n", "<br>")
message.data = msg
if not invalid:
data[content["key_remote_jid"]].add_message(content["_id"], message)
data[content["key_remote_jid"]].add_message(content["_id"], message)
i += 1
if i % 1000 == 0:
print(f"Processing messages...({i}/{total_row_number})", end="\r")

View File

@@ -152,5 +152,86 @@ def brute_force_offset(max_iv=200, max_db=200):
yield iv, iv + 16, db
def determine_metadata(content, init_msg):
msg = init_msg if init_msg else ""
if content["is_me_joined"] == 1: # Override
return f"You were added into the group by {msg}"
if content["action_type"] == 1:
msg += f''' changed the group name to "{content['data']}"'''
elif content["action_type"] == 4:
msg += " was added to the group"
elif content["action_type"] == 5:
msg += " left the group"
elif content["action_type"] == 6:
msg += f" changed the group icon"
elif content["action_type"] == 7:
msg = "You were removed"
elif content["action_type"] == 8:
msg += ("WhatsApp Internal Error Occurred: "
"you cannot send message to this group")
elif content["action_type"] == 9:
msg += " created a broadcast channel"
elif content["action_type"] == 10:
try:
old = content['old_jid'].split('@')[0]
new = content['new_jid'].split('@')[0]
except (AttributeError, IndexError):
return None
else:
msg = f"{old} changed their number to {new}"
elif content["action_type"] == 11:
msg += f''' created a group with name: "{content['data']}"'''
elif content["action_type"] == 12:
msg += f" added someone" # TODO: Find out who
elif content["action_type"] == 13:
return # Someone left the group
elif content["action_type"] == 14:
msg += f" removed someone" # TODO: Find out who
elif content["action_type"] == 15:
return # Someone promoted someone as an admin
elif content["action_type"] == 18:
if msg != "You":
msg = f"The security code between you and {msg} changed"
else:
msg = "The security code in this chat changed"
elif content["action_type"] == 19:
msg = "This chat is now end-to-end encrypted"
elif content["action_type"] == 20:
msg = "Someone joined this group by using a invite link" # TODO: Find out who
elif content["action_type"] == 27:
msg += " changed the group description to:<br>"
msg += content['data'].replace("\n", '<br>')
elif content["action_type"] == 28:
try:
old = content['old_jid'].split('@')[0]
new = content['new_jid'].split('@')[0]
except (AttributeError, IndexError):
return None
else:
msg = f"{old} changed their number to {new}"
elif content["action_type"] == 46:
return # Voice message in PM??? Seems no need to handle.
elif content["action_type"] == 47:
msg = "The contact is an official business account"
elif content["action_type"] == 50:
msg = "The contact's account type changed from business to standard"
elif content["action_type"] == 56:
msg = "Messgae timer was enabled/updated/disabled"
elif content["action_type"] == 57:
if msg != "You":
msg = f"The security code between you and {msg} changed"
else:
msg = "The security code in this chat changed"
elif content["action_type"] == 58:
msg = "You blocked this contact"
elif content["action_type"] == 67:
return # (PM) this contact use secure service from Facebook???
elif content["action_type"] == 69:
return # (PM) this contact use secure service from Facebook??? What's the difference with 67????
else:
return # Unsupported
return msg
# iOS Specific
APPLE_TIME = datetime.timestamp(datetime(2001, 1, 1))