This commit is contained in:
KnugiHK
2025-05-11 18:07:51 +08:00
parent cc410b8503
commit 33149075d3
14 changed files with 372 additions and 311 deletions

View File

@@ -14,12 +14,12 @@ logger = logging.getLogger(__name__)
def messages(path, data, assume_first_as_me=False):
"""
Extracts messages from an exported WhatsApp chat file.
Args:
path: Path to the exported chat file
data: Data container object to store the parsed chat
assume_first_as_me: If True, assumes the first message is sent from the user without asking
Returns:
Updated data container with extracted messages
"""
@@ -27,16 +27,16 @@ def messages(path, data, assume_first_as_me=False):
chat = data.add_chat("ExportedChat", ChatStore(Device.EXPORTED))
you = "" # Will store the username of the current user
user_identification_done = False # Flag to track if user identification has been done
# First pass: count total lines for progress reporting
with open(path, "r", encoding="utf8") as file:
total_row_number = sum(1 for _ in file)
# Second pass: process the messages
with open(path, "r", encoding="utf8") as file:
for index, line in enumerate(file):
you, user_identification_done = process_line(
line, index, chat, path, you,
line, index, chat, path, you,
assume_first_as_me, user_identification_done
)
@@ -51,31 +51,31 @@ def messages(path, data, assume_first_as_me=False):
def process_line(line, index, chat, file_path, you, assume_first_as_me, user_identification_done):
"""
Process a single line from the chat file
Returns:
Tuple of (updated_you_value, updated_user_identification_done_flag)
"""
parts = line.split(" - ", 1)
# Check if this is a new message (has timestamp format)
if len(parts) > 1:
time = parts[0]
you, user_identification_done = process_new_message(
time, parts[1], index, chat, you, file_path,
time, parts[1], index, chat, you, file_path,
assume_first_as_me, user_identification_done
)
else:
# This is a continuation of the previous message
process_message_continuation(line, index, chat)
return you, user_identification_done
def process_new_message(time, content, index, chat, you, file_path,
def process_new_message(time, content, index, chat, you, file_path,
assume_first_as_me, user_identification_done):
"""
Process a line that contains a new message
Returns:
Tuple of (updated_you_value, updated_user_identification_done_flag)
"""
@@ -88,7 +88,7 @@ def process_new_message(time, content, index, chat, you, file_path,
received_timestamp=None,
read_timestamp=None
)
# Check if this is a system message (no name:message format)
if ":" not in content:
msg.data = content
@@ -96,7 +96,7 @@ def process_new_message(time, content, index, chat, you, file_path,
else:
# Process user message
name, message = content.strip().split(":", 1)
# Handle user identification
if you == "":
if chat.name is None:
@@ -113,17 +113,17 @@ def process_new_message(time, content, index, chat, you, file_path,
# If we know the chat name, anyone else must be "you"
if name != chat.name:
you = name
# Set the chat name if needed
if chat.name is None and name != you:
chat.name = name
# Determine if this message is from the current user
msg.from_me = (name == you)
# Process message content
process_message_content(msg, message, file_path)
chat.add_message(index, msg)
return you, user_identification_done
@@ -144,11 +144,11 @@ def process_attached_file(msg, message, file_path):
"""Process an attached file in a message"""
mime = MimeTypes()
msg.media = True
# Extract file path and check if it exists
file_name = message.split("(file attached)")[0].strip()
attached_file_path = os.path.join(os.path.dirname(file_path), file_name)
if os.path.isfile(attached_file_path):
msg.data = attached_file_path
guess = mime.guess_type(attached_file_path)[0]
@@ -165,9 +165,9 @@ def process_message_continuation(line, index, chat):
lookback = index - 1
while lookback not in chat.keys():
lookback -= 1
msg = chat.get_message(lookback)
# Add the continuation line to the message
if msg.media:
msg.caption = line.strip()
@@ -182,4 +182,4 @@ def prompt_for_user_identification(name):
if ans == "y":
return name
elif ans == "n":
return ""
return ""