mirror of
https://github.com/KnugiHK/WhatsApp-Chat-Exporter.git
synced 2026-04-25 07:21:36 +00:00
Batch of change see description
1. Support some metadata 2. Show the sender of messages from group 3. Support GPS (android only) 4. Support vCards
This commit is contained in:
95
extract.py
95
extract.py
@@ -8,6 +8,7 @@ import os
|
|||||||
import base64
|
import base64
|
||||||
import requests
|
import requests
|
||||||
import shutil
|
import shutil
|
||||||
|
import re
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from mimetypes import MimeTypes
|
from mimetypes import MimeTypes
|
||||||
|
|
||||||
@@ -43,7 +44,8 @@ c.execute("""SELECT count() FROM messages""")
|
|||||||
total_row_number = c.fetchone()[0]
|
total_row_number = c.fetchone()[0]
|
||||||
print(f"Gathering messages...(0/{total_row_number})", end="\r")
|
print(f"Gathering messages...(0/{total_row_number})", end="\r")
|
||||||
|
|
||||||
c.execute("""SELECT key_remote_jid, _id, key_from_me, timestamp, data FROM messages; """)
|
phone_number_re = re.compile(r"[0-9]+@s.whatsapp.net")
|
||||||
|
c.execute("""SELECT key_remote_jid, _id, key_from_me, timestamp, data, status, edit_version, thumb_image, remote_resource, media_wa_type, latitude, longitude FROM messages; """)
|
||||||
i = 0
|
i = 0
|
||||||
content = c.fetchone()
|
content = c.fetchone()
|
||||||
while content is not None:
|
while content is not None:
|
||||||
@@ -53,9 +55,76 @@ while content is not None:
|
|||||||
"from_me": bool(content[2]),
|
"from_me": bool(content[2]),
|
||||||
"timestamp": content[3]/1000,
|
"timestamp": content[3]/1000,
|
||||||
"time": datetime.fromtimestamp(content[3]/1000).strftime("%H:%M"),
|
"time": datetime.fromtimestamp(content[3]/1000).strftime("%H:%M"),
|
||||||
"data": content[4],
|
|
||||||
"media": False
|
"media": False
|
||||||
}
|
}
|
||||||
|
if "-" in content[0] and content[2] == 0:
|
||||||
|
if content[8] in data:
|
||||||
|
name = data[content[8]]["name"]
|
||||||
|
else:
|
||||||
|
name = None
|
||||||
|
data[content[0]]["messages"][content[1]]["sender"] = name or content[8].split('@')[0]
|
||||||
|
else:
|
||||||
|
data[content[0]]["messages"][content[1]]["sender"] = None
|
||||||
|
|
||||||
|
if content[5] == 6:
|
||||||
|
if "-" in content[0]:
|
||||||
|
# Is Group
|
||||||
|
if content[4] is not None:
|
||||||
|
try:
|
||||||
|
int(content[4])
|
||||||
|
except:
|
||||||
|
data[content[0]]["messages"][content[1]]["data"] = "{The group name changed to "f"{content[4]}"" }"
|
||||||
|
else:
|
||||||
|
del data[content[0]]["messages"][content[1]]
|
||||||
|
else:
|
||||||
|
thumb_image = content[7]
|
||||||
|
if thumb_image is not None:
|
||||||
|
if b"\x00\x00\x01\x74\x00\x1A" in thumb_image:
|
||||||
|
# Add user
|
||||||
|
added = phone_number_re.search(thumb_image.decode("unicode_escape"))[0]
|
||||||
|
if added in data:
|
||||||
|
name_right = data[added]["name"]
|
||||||
|
else:
|
||||||
|
name_right = added.split('@')[0]
|
||||||
|
if content[8] is not None:
|
||||||
|
if content[8] in data:
|
||||||
|
name_left = data[content[8]]["name"]
|
||||||
|
else:
|
||||||
|
name_left = content[8].split('@')[0]
|
||||||
|
data[content[0]]["messages"][content[1]]["data"] = "{"f"{name_left}"f" added {name_right or 'You'}""}"
|
||||||
|
else:
|
||||||
|
data[content[0]]["messages"][content[1]]["data"] = "{"f"Added {name_right or 'You'}""}"
|
||||||
|
if b"\xac\xed\x00\x05\x74\x00" in thumb_image:
|
||||||
|
# Changed number
|
||||||
|
original = content[8].split('@')[0]
|
||||||
|
changed = thumb_image[7:].decode().split('@')[0]
|
||||||
|
data[content[0]]["messages"][content[1]]["data"] = "{"f"{original} changed to {changed}""}"
|
||||||
|
else:
|
||||||
|
if content[4] is None:
|
||||||
|
del data[content[0]]["messages"][content[1]]
|
||||||
|
else:
|
||||||
|
# Private chat
|
||||||
|
if content[4] is None and content[7] is None:
|
||||||
|
del data[content[0]]["messages"][content[1]]
|
||||||
|
|
||||||
|
else:
|
||||||
|
if content[2] == 1:
|
||||||
|
if content[5] == 5 and content[6] == 7:
|
||||||
|
data[content[0]]["messages"][content[1]]["data"] = "{Message deleted}"
|
||||||
|
else:
|
||||||
|
if content[9] == "5":
|
||||||
|
data[content[0]]["messages"][content[1]]["data"] = "{ Location shared: "f"{content[10], content[11]}"" }"
|
||||||
|
else:
|
||||||
|
data[content[0]]["messages"][content[1]]["data"] = content[4]
|
||||||
|
else:
|
||||||
|
if content[5] == 0 and content[6] == 7:
|
||||||
|
data[content[0]]["messages"][content[1]]["data"] = "{Message deleted}"
|
||||||
|
else:
|
||||||
|
if content[9] == "5":
|
||||||
|
data[content[0]]["messages"][content[1]]["data"] = "{ Location shared: "f"{content[10], content[11]}"" }"
|
||||||
|
else:
|
||||||
|
data[content[0]]["messages"][content[1]]["data"] = content[4]
|
||||||
|
|
||||||
i += 1
|
i += 1
|
||||||
if i % 1000 == 0:
|
if i % 1000 == 0:
|
||||||
print(f"Gathering messages...({i}/{total_row_number})", end="\r")
|
print(f"Gathering messages...({i}/{total_row_number})", end="\r")
|
||||||
@@ -102,6 +171,23 @@ while content is not None:
|
|||||||
content = c.fetchone()
|
content = c.fetchone()
|
||||||
print(f"Gathering media...({total_row_number}/{total_row_number})", end="\r")
|
print(f"Gathering media...({total_row_number}/{total_row_number})", end="\r")
|
||||||
|
|
||||||
|
c.execute("""SELECT message_row_id, messages.key_remote_jid, vcard, messages.media_name FROM messages_vcards INNER JOIN messages ON messages_vcards.message_row_id = messages._id ORDER BY messages.key_remote_jid ASC""")
|
||||||
|
rows = c.fetchall()
|
||||||
|
total_row_number = len(rows)
|
||||||
|
print(f"\nGathering vCards...(0/{total_row_number})", end="\r")
|
||||||
|
base = "WhatsApp/vCards"
|
||||||
|
for index, row in enumerate(rows):
|
||||||
|
if not os.path.isdir(base):
|
||||||
|
os.mkdir(base)
|
||||||
|
file_name = "".join(x for x in row[3] if x.isalnum())
|
||||||
|
file_path = f"{base}/{file_name}.vcf"
|
||||||
|
if not os.path.isfile(file_path):
|
||||||
|
with open(file_path, "w", encoding="utf-8") as f:
|
||||||
|
f.write(row[2])
|
||||||
|
data[row[1]]["messages"][row[0]]["data"] = row[3] + "{ The vCard file cannot be displayed here, however it should be located at " + file_path + "}"
|
||||||
|
data[row[1]]["messages"][row[0]]["mime"] = "x-vcard"
|
||||||
|
print(f"Gathering vCards...({index + 1}/{total_row_number})", end="\r")
|
||||||
|
|
||||||
templateLoader = jinja2.FileSystemLoader(searchpath="./")
|
templateLoader = jinja2.FileSystemLoader(searchpath="./")
|
||||||
templateEnv = jinja2.Environment(loader=templateLoader)
|
templateEnv = jinja2.Environment(loader=templateLoader)
|
||||||
templateEnv.globals.update(determine_day=determine_day)
|
templateEnv.globals.update(determine_day=determine_day)
|
||||||
@@ -135,8 +221,9 @@ for current, i in enumerate(data):
|
|||||||
name = data[i]["name"]
|
name = data[i]["name"]
|
||||||
else:
|
else:
|
||||||
name = phone_number
|
name = phone_number
|
||||||
|
safe_file_name = ''
|
||||||
with open(f"{output_folder}/{file_name}.html", "w", encoding="utf-8") as f:
|
safe_file_name = "".join(x for x in file_name if x.isalnum())
|
||||||
|
with open(f"{output_folder}/{safe_file_name}.html", "w", encoding="utf-8") as f:
|
||||||
f.write(template.render(name=name, msgs=data[i]["messages"].values(), my_avatar=None, their_avatar=f"WhatsApp/Avatars/{i}.j"))
|
f.write(template.render(name=name, msgs=data[i]["messages"].values(), my_avatar=None, their_avatar=f"WhatsApp/Avatars/{i}.j"))
|
||||||
if current % 10 == 0:
|
if current % 10 == 0:
|
||||||
print(f"Creating HTML...({current}/{total_row_number})", end="\r")
|
print(f"Creating HTML...({current}/{total_row_number})", end="\r")
|
||||||
|
|||||||
@@ -41,7 +41,7 @@ total_row_number = c.fetchone()[0]
|
|||||||
apple_time = datetime.timestamp(datetime(2001,1,1))
|
apple_time = datetime.timestamp(datetime(2001,1,1))
|
||||||
print(f"Gathering messages...(0/{total_row_number})", end="\r")
|
print(f"Gathering messages...(0/{total_row_number})", end="\r")
|
||||||
|
|
||||||
c.execute("""SELECT COALESCE(ZFROMJID, ZTOJID), Z_PK, ZISFROMME, ZMESSAGEDATE, ZTEXT FROM ZWAMESSAGE;""")
|
c.execute("""SELECT COALESCE(ZFROMJID, ZTOJID), ZWAMESSAGE.Z_PK, ZISFROMME, ZMESSAGEDATE, ZTEXT, ZMESSAGETYPE, ZWAGROUPMEMBER.ZMEMBERJID FROM main.ZWAMESSAGE LEFT JOIN main.ZWAGROUPMEMBER ON main.ZWAMESSAGE.ZGROUPMEMBER = main.ZWAGROUPMEMBER.Z_PK;""")
|
||||||
i = 0
|
i = 0
|
||||||
content = c.fetchone()
|
content = c.fetchone()
|
||||||
while content is not None:
|
while content is not None:
|
||||||
@@ -52,9 +52,51 @@ while content is not None:
|
|||||||
"from_me": bool(content[2]),
|
"from_me": bool(content[2]),
|
||||||
"timestamp": ts,
|
"timestamp": ts,
|
||||||
"time": datetime.fromtimestamp(ts).strftime("%H:%M"),
|
"time": datetime.fromtimestamp(ts).strftime("%H:%M"),
|
||||||
"data": content[4],
|
|
||||||
"media": False
|
"media": False
|
||||||
}
|
}
|
||||||
|
if "-" in content[0] and content[2] == 0:
|
||||||
|
name = None
|
||||||
|
if content[6] is not None:
|
||||||
|
if content[6] in data:
|
||||||
|
name = data[content[6]]["name"]
|
||||||
|
if "@" in content[6]:
|
||||||
|
fallback = content[6].split('@')[0]
|
||||||
|
else:
|
||||||
|
fallback = None
|
||||||
|
else:
|
||||||
|
fallback = None
|
||||||
|
data[content[0]]["messages"][content[1]]["sender"] = name or fallback
|
||||||
|
else:
|
||||||
|
data[content[0]]["messages"][content[1]]["sender"] = None
|
||||||
|
if content[5] == 6:
|
||||||
|
# Metadata
|
||||||
|
if "-" in content[0]:
|
||||||
|
# Group
|
||||||
|
if content[4] is not None:
|
||||||
|
# Chnaged name
|
||||||
|
try:
|
||||||
|
int(content[4])
|
||||||
|
except:
|
||||||
|
data[content[0]]["messages"][content[1]]["data"] = "{The group name changed to "f"{content[4]}"" }"
|
||||||
|
else:
|
||||||
|
del data[content[0]]["messages"][content[1]]
|
||||||
|
else:
|
||||||
|
data[content[0]]["messages"][content[1]]["data"] = None
|
||||||
|
else:
|
||||||
|
data[content[0]]["messages"][content[1]]["data"] = None
|
||||||
|
else:
|
||||||
|
# real message
|
||||||
|
if content[2] == 1:
|
||||||
|
if content[5] == 14:
|
||||||
|
data[content[0]]["messages"][content[1]]["data"] = "{Message deleted}"
|
||||||
|
else:
|
||||||
|
data[content[0]]["messages"][content[1]]["data"] = content[4]
|
||||||
|
else:
|
||||||
|
if content[5] == 14:
|
||||||
|
data[content[0]]["messages"][content[1]]["data"] = "{Message deleted}"
|
||||||
|
else:
|
||||||
|
data[content[0]]["messages"][content[1]]["data"] = content[4]
|
||||||
|
|
||||||
i += 1
|
i += 1
|
||||||
if i % 1000 == 0:
|
if i % 1000 == 0:
|
||||||
print(f"Gathering messages...({i}/{total_row_number})", end="\r")
|
print(f"Gathering messages...({i}/{total_row_number})", end="\r")
|
||||||
@@ -100,6 +142,24 @@ while content is not None:
|
|||||||
content = c.fetchone()
|
content = c.fetchone()
|
||||||
print(f"Gathering media...({total_row_number}/{total_row_number})", end="\r")
|
print(f"Gathering media...({total_row_number}/{total_row_number})", end="\r")
|
||||||
|
|
||||||
|
c.execute("""SELECT DISTINCT ZWAVCARDMENTION.ZMEDIAITEM, ZWAMEDIAITEM.ZMESSAGE, COALESCE(ZWAMESSAGE.ZFROMJID, ZWAMESSAGE.ZTOJID) as _id, ZVCARDNAME, ZVCARDSTRING FROM ZWAVCARDMENTION INNER JOIN ZWAMEDIAITEM ON ZWAVCARDMENTION.ZMEDIAITEM = ZWAMEDIAITEM.Z_PK INNER JOIN ZWAMESSAGE ON ZWAMEDIAITEM.ZMESSAGE = ZWAMESSAGE.Z_PK""")
|
||||||
|
rows = c.fetchall()
|
||||||
|
total_row_number = len(rows)
|
||||||
|
print(f"\nGathering vCards...(0/{total_row_number})", end="\r")
|
||||||
|
base = "Message/vCards"
|
||||||
|
for index, row in enumerate(rows):
|
||||||
|
if not os.path.isdir(base):
|
||||||
|
os.mkdir(base)
|
||||||
|
file_name = "".join(x for x in row[3] if x.isalnum())
|
||||||
|
file_path = f"{base}/{file_name[:200]}.vcf"
|
||||||
|
if not os.path.isfile(file_path):
|
||||||
|
with open(file_path, "w", encoding="utf-8") as f:
|
||||||
|
f.write(row[4])
|
||||||
|
data[row[2]]["messages"][row[1]]["data"] = row[3] + "{ The vCard file cannot be displayed here, however it should be located at " + file_path + "}"
|
||||||
|
data[row[2]]["messages"][row[1]]["mime"] = "x-vcard"
|
||||||
|
data[row[2]]["messages"][row[1]]["media"] = True
|
||||||
|
print(f"Gathering vCards...({index + 1}/{total_row_number})", end="\r")
|
||||||
|
|
||||||
templateLoader = jinja2.FileSystemLoader(searchpath="./")
|
templateLoader = jinja2.FileSystemLoader(searchpath="./")
|
||||||
templateEnv = jinja2.Environment(loader=templateLoader)
|
templateEnv = jinja2.Environment(loader=templateLoader)
|
||||||
templateEnv.globals.update(determine_day=determine_day)
|
templateEnv.globals.update(determine_day=determine_day)
|
||||||
@@ -133,8 +193,10 @@ for current, i in enumerate(data):
|
|||||||
name = data[i]["name"]
|
name = data[i]["name"]
|
||||||
else:
|
else:
|
||||||
name = phone_number
|
name = phone_number
|
||||||
|
|
||||||
with open(f"{output_folder}/{file_name}.html", "w", encoding="utf-8") as f:
|
safe_file_name = ''
|
||||||
|
safe_file_name = "".join(x for x in file_name if x.isalnum())
|
||||||
|
with open(f"{output_folder}/{safe_file_name}.html", "w", encoding="utf-8") as f:
|
||||||
f.write(template.render(name=name, msgs=data[i]["messages"].values(), my_avatar=None, their_avatar=f"WhatsApp/Avatars/{i}.j"))
|
f.write(template.render(name=name, msgs=data[i]["messages"].values(), my_avatar=None, their_avatar=f"WhatsApp/Avatars/{i}.j"))
|
||||||
if current % 10 == 0:
|
if current % 10 == 0:
|
||||||
print(f"Creating HTML...({current}/{total_row_number})", end="\r")
|
print(f"Creating HTML...({current}/{total_row_number})", end="\r")
|
||||||
|
|||||||
@@ -30,7 +30,6 @@
|
|||||||
}
|
}
|
||||||
img, video {
|
img, video {
|
||||||
max-width:100%;
|
max-width:100%;
|
||||||
|
|
||||||
width: 70%;
|
width: 70%;
|
||||||
}
|
}
|
||||||
</style>
|
</style>
|
||||||
@@ -55,7 +54,7 @@
|
|||||||
<div class="w3-col m10 l10">
|
<div class="w3-col m10 l10">
|
||||||
<div style="text-align: right;">
|
<div style="text-align: right;">
|
||||||
{% if msg.media == false %}
|
{% if msg.media == false %}
|
||||||
{% filter escape %}{{ msg.data }}{% endfilter %}
|
{% filter escape %}{{ msg.data or "{This message is not supported yet}" }}{% endfilter %}
|
||||||
{% else %}
|
{% else %}
|
||||||
{% if "image/" in msg.mime %}
|
{% if "image/" in msg.mime %}
|
||||||
<img src="{{ msg.data }}" />
|
<img src="{{ msg.data }}" />
|
||||||
@@ -79,7 +78,13 @@
|
|||||||
</div>
|
</div>
|
||||||
{% else %}
|
{% else %}
|
||||||
<div class="w3-row">
|
<div class="w3-row">
|
||||||
<div style="padding-right: 10px; float: left; color: #3892da;">{{ name }}</div>
|
<div style="padding-right: 10px; float: left; color: #3892da;">
|
||||||
|
{% if msg.sender is not none %}
|
||||||
|
{{ msg.sender }}
|
||||||
|
{% else %}
|
||||||
|
{{ name }}
|
||||||
|
{% endif %}
|
||||||
|
</div>
|
||||||
<div style="text-align: right; color:#70777c;">{{ msg.time }}</div>
|
<div style="text-align: right; color:#70777c;">{{ msg.time }}</div>
|
||||||
</div>
|
</div>
|
||||||
<div class="w3-row">
|
<div class="w3-row">
|
||||||
@@ -87,7 +92,7 @@
|
|||||||
<div class="w3-col m10 l10">
|
<div class="w3-col m10 l10">
|
||||||
<div style="text-align: left;">
|
<div style="text-align: left;">
|
||||||
{% if msg.media == false %}
|
{% if msg.media == false %}
|
||||||
{% filter escape %}{{ msg.data }}{% endfilter %}
|
{% filter escape %}{{ msg.data or "{This message is not supported yet}" }}{% endfilter %}
|
||||||
{% else %}
|
{% else %}
|
||||||
{% if "image/" in msg.mime %}
|
{% if "image/" in msg.mime %}
|
||||||
<img src="{% filter escape %}{{ msg.data }}{% endfilter %}" />
|
<img src="{% filter escape %}{{ msg.data }}{% endfilter %}" />
|
||||||
|
|||||||
Reference in New Issue
Block a user