From 2ff0192ff9455aeea108e86b25bd37d986e2dc8f Mon Sep 17 00:00:00 2001
From: KnugiHK <24708955+KnugiHK@users.noreply.github.com>
Date: Thu, 14 Jan 2021 20:40:17 +0800
Subject: [PATCH] Support caption and fix bug of reply in iPhone
---
extract.py | 9 +++++++--
extract_iphone.py | 11 ++++++++---
whatsapp.html | 25 ++++++++++++++++---------
3 files changed, 31 insertions(+), 14 deletions(-)
diff --git a/extract.py b/extract.py
index 34fbb43..186fb38 100644
--- a/extract.py
+++ b/extract.py
@@ -45,7 +45,7 @@ total_row_number = c.fetchone()[0]
print(f"Gathering messages...(0/{total_row_number})", end="\r")
phone_number_re = re.compile(r"[0-9]+@s.whatsapp.net")
-c.execute("""SELECT messages.key_remote_jid, messages._id, messages.key_from_me, messages.timestamp, messages.data, messages.status, messages.edit_version, messages.thumb_image, messages.remote_resource, messages.media_wa_type, messages.latitude, messages.longitude, messages_quotes.key_id as quoted, messages.key_id, messages_quotes.data FROM messages LEFT JOIN messages_quotes ON messages.quoted_row_id = messages_quotes._id; """)
+c.execute("""SELECT messages.key_remote_jid, messages._id, messages.key_from_me, messages.timestamp, messages.data, messages.status, messages.edit_version, messages.thumb_image, messages.remote_resource, messages.media_wa_type, messages.latitude, messages.longitude, messages_quotes.key_id as quoted, messages.key_id, messages_quotes.data, messages.media_caption FROM messages LEFT JOIN messages_quotes ON messages.quoted_row_id = messages_quotes._id; """)
i = 0
content = c.fetchone()
while content is not None:
@@ -73,6 +73,11 @@ while content is not None:
else:
data[content[0]]["messages"][content[1]]["reply"] = None
+ if content[15] is not None:
+ data[content[0]]["messages"][content[1]]["caption"] = content[15]
+ else:
+ data[content[0]]["messages"][content[1]]["caption"] = None
+
if content[5] == 6:
if "-" in content[0]:
# Is Group
@@ -229,7 +234,7 @@ for current, i in enumerate(data):
else:
name = phone_number
safe_file_name = ''
- safe_file_name = "".join(x for x in file_name if x.isalnum())
+ safe_file_name = "".join(x for x in file_name if x.isalnum() or x in "- ")
with open(f"{output_folder}/{safe_file_name}.html", "w", encoding="utf-8") as f:
f.write(template.render(name=name, msgs=data[i]["messages"].values(), my_avatar=None, their_avatar=f"WhatsApp/Avatars/{i}.j"))
if current % 10 == 0:
diff --git a/extract_iphone.py b/extract_iphone.py
index 8c3fc08..5476a7d 100644
--- a/extract_iphone.py
+++ b/extract_iphone.py
@@ -52,7 +52,9 @@ while content is not None:
"from_me": bool(content[2]),
"timestamp": ts,
"time": datetime.fromtimestamp(ts).strftime("%H:%M"),
- "media": False
+ "media": False,
+ "reply": None,
+ "caption": None
}
if "-" in content[0] and content[2] == 0:
name = None
@@ -108,12 +110,13 @@ c.execute("""SELECT count() FROM ZWAMEDIAITEM""")
total_row_number = c.fetchone()[0]
print(f"\nGathering media...(0/{total_row_number})", end="\r")
i = 0
-c.execute("""SELECT COALESCE(ZWAMESSAGE.ZFROMJID, ZWAMESSAGE.ZTOJID) as _id, ZMESSAGE, ZMEDIALOCALPATH, ZMEDIAURL, ZVCARDSTRING, ZMEDIAKEY FROM ZWAMEDIAITEM INNER JOIN ZWAMESSAGE ON ZWAMEDIAITEM.ZMESSAGE = ZWAMESSAGE.Z_PK WHERE ZMEDIALOCALPATH IS NOT NULL ORDER BY _id ASC""")
+c.execute("""SELECT COALESCE(ZWAMESSAGE.ZFROMJID, ZWAMESSAGE.ZTOJID) as _id, ZMESSAGE, ZMEDIALOCALPATH, ZMEDIAURL, ZVCARDSTRING, ZMEDIAKEY, ZTITLE FROM ZWAMEDIAITEM INNER JOIN ZWAMESSAGE ON ZWAMEDIAITEM.ZMESSAGE = ZWAMESSAGE.Z_PK WHERE ZMEDIALOCALPATH IS NOT NULL ORDER BY _id ASC""")
content = c.fetchone()
mime = MimeTypes()
while content is not None:
file_path = f"Message/{content[2]}"
data[content[0]]["messages"][content[1]]["media"] = True
+
if os.path.isfile(file_path):
data[content[0]]["messages"][content[1]]["data"] = file_path
if content[4] is None:
@@ -136,6 +139,8 @@ while content is not None:
# else:
data[content[0]]["messages"][content[1]]["data"] = "{The media is missing}"
data[content[0]]["messages"][content[1]]["mime"] = "media"
+ if content[6] is not None:
+ data[content[0]]["messages"][content[1]]["caption"] = content[6]
i += 1
if i % 100 == 0:
print(f"Gathering media...({i}/{total_row_number})", end="\r")
@@ -195,7 +200,7 @@ for current, i in enumerate(data):
name = phone_number
safe_file_name = ''
- safe_file_name = "".join(x for x in file_name if x.isalnum())
+ safe_file_name = "".join(x for x in file_name if x.isalnum() or x in "- ")
with open(f"{output_folder}/{safe_file_name}.html", "w", encoding="utf-8") as f:
f.write(template.render(name=name, msgs=data[i]["messages"].values(), my_avatar=None, their_avatar=f"WhatsApp/Avatars/{i}.j"))
if current % 10 == 0:
diff --git a/whatsapp.html b/whatsapp.html
index 4c208c9..7d99597 100644
--- a/whatsapp.html
+++ b/whatsapp.html
@@ -33,7 +33,6 @@
}
img, video {
max-width:100%;
- width: 70%;
}
a.anchor {
display: block;
@@ -77,20 +76,24 @@
{% filter escape %}{{ msg.data or "{This message is not supported yet}" | replace('\n', ' ') }}{% endfilter %}
{% else %}
{% if "image/" in msg.mime %}
-
+
{% elif "audio/" in msg.mime %}
{% elif "video/" in msg.mime %}
{% elif "/" in msg.mime %}
- {The file cannot be displayed here, however it should be located at {% filter escape %}{{ msg.data }}{% endfilter %}}
+ {The file cannot be displayed here, however it should be located at {{ msg.data }}}
{% else %}
{% filter escape %}{{ msg.data }}{% endfilter %}
{% endif %}
+ {% if msg.caption is not none %}
+
+ {{ msg.caption }}
+ {% endif %}
{% endif %}
@@ -121,20 +124,24 @@
{% filter escape %}{{ msg.data or "{This message is not supported yet}" }}{% endfilter %}
{% else %}
{% if "image/" in msg.mime %}
-
+
{% elif "audio/" in msg.mime %}
{% elif "video/" in msg.mime %}
{% elif "/" in msg.mime %}
- {The file cannot be displayed here, however it should be located at {% filter escape %}{{ msg.data }}{% endfilter %}}
+ {The file cannot be displayed here, however it should be located at {{ msg.data }}}
{% else %}
{% filter escape %}{{ msg.data }}{% endfilter %}
{% endif %}
+ {% if msg.caption is not none %}
+
+ {{ msg.caption }}
+ {% endif %}
{% endif %}