mirror of
https://github.com/KnugiHK/WhatsApp-Chat-Exporter.git
synced 2026-04-11 05:46:11 +00:00
Prepare for publishing in PyPi
This commit is contained in:
1
Whatsapp_Chat_Exporter/__init__.py
Normal file
1
Whatsapp_Chat_Exporter/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
__version__ = "0.5"
|
||||
357
Whatsapp_Chat_Exporter/extract.py
Normal file
357
Whatsapp_Chat_Exporter/extract.py
Normal file
@@ -0,0 +1,357 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
import sqlite3
|
||||
import json
|
||||
import jinja2
|
||||
import os
|
||||
import requests
|
||||
import shutil
|
||||
import re
|
||||
import pkgutil
|
||||
from datetime import datetime
|
||||
from mimetypes import MimeTypes
|
||||
|
||||
|
||||
def determine_day(last, current):
|
||||
last = datetime.fromtimestamp(last).date()
|
||||
current = datetime.fromtimestamp(current).date()
|
||||
if last == current:
|
||||
return None
|
||||
else:
|
||||
return current
|
||||
|
||||
|
||||
def contacts(db, data):
|
||||
# Get contacts
|
||||
c = db.cursor()
|
||||
c.execute("""SELECT count() FROM wa_contacts""")
|
||||
total_row_number = c.fetchone()[0]
|
||||
print(f"Gathering contacts...({total_row_number})")
|
||||
|
||||
c.execute("""SELECT jid, display_name FROM wa_contacts; """)
|
||||
row = c.fetchone()
|
||||
while row is not None:
|
||||
data[row[0]] = {"name": row[1], "messages": {}}
|
||||
row = c.fetchone()
|
||||
|
||||
|
||||
def messages(db, data):
|
||||
# Get message history
|
||||
c = db.cursor()
|
||||
c.execute("""SELECT count() FROM messages""")
|
||||
total_row_number = c.fetchone()[0]
|
||||
print(f"Gathering messages...(0/{total_row_number})", end="\r")
|
||||
|
||||
phone_number_re = re.compile(r"[0-9]+@s.whatsapp.net")
|
||||
c.execute("""SELECT messages.key_remote_jid,
|
||||
messages._id,
|
||||
messages.key_from_me,
|
||||
messages.timestamp,
|
||||
messages.data,
|
||||
messages.status,
|
||||
messages.edit_version,
|
||||
messages.thumb_image,
|
||||
messages.remote_resource,
|
||||
messages.media_wa_type,
|
||||
messages.latitude,
|
||||
messages.longitude,
|
||||
messages_quotes.key_id as quoted,
|
||||
messages.key_id,
|
||||
messages_quotes.data,
|
||||
messages.media_caption
|
||||
FROM messages
|
||||
LEFT JOIN messages_quotes
|
||||
ON messages.quoted_row_id = messages_quotes._id;""")
|
||||
i = 0
|
||||
content = c.fetchone()
|
||||
while content is not None:
|
||||
if content[0] not in data:
|
||||
data[content[0]] = {"name": None, "messages": {}}
|
||||
data[content[0]]["messages"][content[1]] = {
|
||||
"from_me": bool(content[2]),
|
||||
"timestamp": content[3]/1000,
|
||||
"time": datetime.fromtimestamp(content[3]/1000).strftime("%H:%M"),
|
||||
"media": False,
|
||||
"key_id": content[13]
|
||||
}
|
||||
if "-" in content[0] and content[2] == 0:
|
||||
name = None
|
||||
if content[8] in data:
|
||||
name = data[content[8]]["name"]
|
||||
if "@" in content[8]:
|
||||
fallback = content[8].split('@')[0]
|
||||
else:
|
||||
fallback = None
|
||||
else:
|
||||
fallback = None
|
||||
|
||||
data[content[0]]["messages"][content[1]]["sender"] = name or fallback
|
||||
else:
|
||||
data[content[0]]["messages"][content[1]]["sender"] = None
|
||||
|
||||
if content[12] is not None:
|
||||
data[content[0]]["messages"][content[1]]["reply"] = content[12]
|
||||
data[content[0]]["messages"][content[1]]["quoted_data"] = content[14]
|
||||
else:
|
||||
data[content[0]]["messages"][content[1]]["reply"] = None
|
||||
|
||||
if content[15] is not None:
|
||||
data[content[0]]["messages"][content[1]]["caption"] = content[15]
|
||||
else:
|
||||
data[content[0]]["messages"][content[1]]["caption"] = None
|
||||
|
||||
if content[5] == 6:
|
||||
if "-" in content[0]:
|
||||
# Is Group
|
||||
if content[4] is not None:
|
||||
try:
|
||||
int(content[4])
|
||||
except ValueError:
|
||||
msg = "{The group name changed to "f"{content[4]}"" }"
|
||||
data[content[0]]["messages"][content[1]]["data"] = msg
|
||||
else:
|
||||
del data[content[0]]["messages"][content[1]]
|
||||
else:
|
||||
thumb_image = content[7]
|
||||
if thumb_image is not None:
|
||||
if b"\x00\x00\x01\x74\x00\x1A" in thumb_image:
|
||||
# Add user
|
||||
added = phone_number_re.search(
|
||||
thumb_image.decode("unicode_escape"))[0]
|
||||
if added in data:
|
||||
name_right = data[added]["name"]
|
||||
else:
|
||||
name_right = added.split('@')[0]
|
||||
if content[8] is not None:
|
||||
if content[8] in data:
|
||||
name_left = data[content[8]]["name"]
|
||||
else:
|
||||
name_left = content[8].split('@')[0]
|
||||
msg = "{"f"{name_left}"f" added {name_right or 'You'}""}"
|
||||
else:
|
||||
msg = "{"f"Added {name_right or 'You'}""}"
|
||||
elif b"\xac\xed\x00\x05\x74\x00" in thumb_image:
|
||||
# Changed number
|
||||
original = content[8].split('@')[0]
|
||||
changed = thumb_image[7:].decode().split('@')[0]
|
||||
msg = "{"f"{original} changed to {changed}""}"
|
||||
data[content[0]]["messages"][content[1]]["data"] = msg
|
||||
else:
|
||||
if content[4] is None:
|
||||
del data[content[0]]["messages"][content[1]]
|
||||
else:
|
||||
# Private chat
|
||||
if content[4] is None and content[7] is None:
|
||||
del data[content[0]]["messages"][content[1]]
|
||||
|
||||
else:
|
||||
if content[2] == 1:
|
||||
if content[5] == 5 and content[6] == 7:
|
||||
msg = "{Message deleted}"
|
||||
else:
|
||||
if content[9] == "5":
|
||||
msg = "{ Location shared: "f"{content[10], content[11]}"" }"
|
||||
else:
|
||||
msg = content[4]
|
||||
else:
|
||||
if content[5] == 0 and content[6] == 7:
|
||||
msg = "{Message deleted}"
|
||||
else:
|
||||
if content[9] == "5":
|
||||
msg = "{ Location shared: "f"{content[10], content[11]}"" }"
|
||||
else:
|
||||
msg = content[4]
|
||||
|
||||
data[content[0]]["messages"][content[1]]["data"] = msg
|
||||
|
||||
i += 1
|
||||
if i % 1000 == 0:
|
||||
print(f"Gathering messages...({i}/{total_row_number})", end="\r")
|
||||
content = c.fetchone()
|
||||
print(
|
||||
f"Gathering messages...({total_row_number}/{total_row_number})", end="\r")
|
||||
|
||||
|
||||
def media(db, data, media_folder):
|
||||
# Get media
|
||||
c = db.cursor()
|
||||
c.execute("""SELECT count() FROM message_media""")
|
||||
total_row_number = c.fetchone()[0]
|
||||
print(f"\nGathering media...(0/{total_row_number})", end="\r")
|
||||
i = 0
|
||||
c.execute("""SELECT messages.key_remote_jid,
|
||||
message_row_id,
|
||||
file_path,
|
||||
message_url,
|
||||
mime_type,
|
||||
media_key
|
||||
FROM message_media
|
||||
INNER JOIN messages
|
||||
ON message_media.message_row_id = messages._id
|
||||
ORDER BY messages.key_remote_jid ASC""")
|
||||
content = c.fetchone()
|
||||
mime = MimeTypes()
|
||||
while content is not None:
|
||||
file_path = f"{media_folder}/{content[2]}"
|
||||
data[content[0]]["messages"][content[1]]["media"] = True
|
||||
if os.path.isfile(file_path):
|
||||
data[content[0]]["messages"][content[1]]["data"] = file_path
|
||||
if content[4] is None:
|
||||
guess = mime.guess_type(file_path)[0]
|
||||
if guess is not None:
|
||||
data[content[0]]["messages"][content[1]]["mime"] = guess
|
||||
else:
|
||||
data[content[0]]["messages"][content[1]]["mime"] = "data/data"
|
||||
else:
|
||||
data[content[0]]["messages"][content[1]]["mime"] = content[4]
|
||||
else:
|
||||
# if "https://mmg" in content[4]:
|
||||
# try:
|
||||
# r = requests.get(content[3])
|
||||
# if r.status_code != 200:
|
||||
# raise RuntimeError()
|
||||
# except:
|
||||
# data[content[0]]["messages"][content[1]]["data"] = "{The media is missing}"
|
||||
# data[content[0]]["messages"][content[1]]["media"] = True
|
||||
# data[content[0]]["messages"][content[1]]["mime"] = "media"
|
||||
# else:
|
||||
data[content[0]]["messages"][content[1]]["data"] = "{The media is missing}"
|
||||
data[content[0]]["messages"][content[1]]["mime"] = "media"
|
||||
i += 1
|
||||
if i % 100 == 0:
|
||||
print(f"Gathering media...({i}/{total_row_number})", end="\r")
|
||||
content = c.fetchone()
|
||||
print(
|
||||
f"Gathering media...({total_row_number}/{total_row_number})", end="\r")
|
||||
|
||||
|
||||
def vcard(db, data):
|
||||
c = db.cursor()
|
||||
c.execute("""SELECT message_row_id,
|
||||
messages.key_remote_jid,
|
||||
vcard,
|
||||
messages.media_name
|
||||
FROM messages_vcards
|
||||
INNER JOIN messages
|
||||
ON messages_vcards.message_row_id = messages._id
|
||||
ORDER BY messages.key_remote_jid ASC;""")
|
||||
rows = c.fetchall()
|
||||
total_row_number = len(rows)
|
||||
print(f"\nGathering vCards...(0/{total_row_number})", end="\r")
|
||||
base = "WhatsApp/vCards"
|
||||
for index, row in enumerate(rows):
|
||||
if not os.path.isdir(base):
|
||||
os.mkdir(base)
|
||||
file_name = "".join(x for x in row[3] if x.isalnum())
|
||||
file_path = f"{base}/{file_name}.vcf"
|
||||
if not os.path.isfile(file_path):
|
||||
with open(file_path, "w", encoding="utf-8") as f:
|
||||
f.write(row[2])
|
||||
data[row[1]]["messages"][row[0]]["data"] = row[3] + \
|
||||
"{ The vCard file cannot be displayed here, however it " \
|
||||
"should be located at " + file_path + "}"
|
||||
data[row[1]]["messages"][row[0]]["mime"] = "text/x-vcard"
|
||||
print(f"Gathering vCards...({index + 1}/{total_row_number})", end="\r")
|
||||
|
||||
|
||||
def create_html(data, output_folder):
|
||||
templateLoader = jinja2.FileSystemLoader(searchpath=os.path.dirname(__file__))
|
||||
templateEnv = jinja2.Environment(loader=templateLoader)
|
||||
templateEnv.globals.update(determine_day=determine_day)
|
||||
TEMPLATE_FILE = "whatsapp.html"
|
||||
template = templateEnv.get_template(TEMPLATE_FILE)
|
||||
|
||||
total_row_number = len(data)
|
||||
print(f"\nCreating HTML...(0/{total_row_number})", end="\r")
|
||||
|
||||
if not os.path.isdir(output_folder):
|
||||
os.mkdir(output_folder)
|
||||
|
||||
for current, contact in enumerate(data):
|
||||
if len(data[contact]["messages"]) == 0:
|
||||
continue
|
||||
phone_number = contact.split('@')[0]
|
||||
if "-" in contact:
|
||||
file_name = ""
|
||||
else:
|
||||
file_name = phone_number
|
||||
|
||||
if data[contact]["name"] is not None:
|
||||
if file_name != "":
|
||||
file_name += "-"
|
||||
file_name += data[contact]["name"].replace("/", "-")
|
||||
name = data[contact]["name"]
|
||||
else:
|
||||
name = phone_number
|
||||
safe_file_name = ''
|
||||
safe_file_name = "".join(x for x in file_name if x.isalnum() or x in "- ")
|
||||
with open(f"{output_folder}/{safe_file_name}.html", "w", encoding="utf-8") as f:
|
||||
f.write(
|
||||
template.render(
|
||||
name=name,
|
||||
msgs=data[contact]["messages"].values(),
|
||||
my_avatar=None,
|
||||
their_avatar=f"WhatsApp/Avatars/{contact}.j"
|
||||
)
|
||||
)
|
||||
if current % 10 == 0:
|
||||
print(f"Creating HTML...({current}/{total_row_number})", end="\r")
|
||||
|
||||
print(f"Creating HTML...({total_row_number}/{total_row_number})", end="\r")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from optparse import OptionParser
|
||||
parser = OptionParser()
|
||||
parser.add_option(
|
||||
"-w",
|
||||
"--wa",
|
||||
dest="wa",
|
||||
default="wa.db",
|
||||
help="Path to contact database")
|
||||
parser.add_option(
|
||||
"-m",
|
||||
"--media",
|
||||
dest="media",
|
||||
default="WhatsApp",
|
||||
help="Path to WhatsApp media folder"
|
||||
)
|
||||
# parser.add_option(
|
||||
# "-t",
|
||||
# "--template",
|
||||
# dest="html",
|
||||
# default="wa.db",
|
||||
# help="Path to HTML template")
|
||||
(options, args) = parser.parse_args()
|
||||
msg_db = "msgstore.db"
|
||||
output_folder = "temp"
|
||||
contact_db = options.wa
|
||||
media_folder = options.media
|
||||
|
||||
if len(args) == 1:
|
||||
msg_db = args[0]
|
||||
elif len(args) == 2:
|
||||
msg_db = args[0]
|
||||
output_folder = args[1]
|
||||
|
||||
data = {}
|
||||
|
||||
if os.path.isfile(contact_db):
|
||||
with sqlite3.connect(contact_db) as db:
|
||||
contacts(db, data)
|
||||
if os.path.isfile(msg_db):
|
||||
with sqlite3.connect(msg_db) as db:
|
||||
messages(db, data)
|
||||
media(db, data, media_folder)
|
||||
vcard(db, data)
|
||||
create_html(data, output_folder)
|
||||
|
||||
if not os.path.isdir(f"{output_folder}/WhatsApp"):
|
||||
shutil.move(media_folder, f"{output_folder}/")
|
||||
|
||||
with open("result.json", "w") as f:
|
||||
data = json.dumps(data)
|
||||
print(f"\nWriting JSON file...({int(len(data)/1024/1024)}MB)")
|
||||
f.write(data)
|
||||
|
||||
print("Everything is done!")
|
||||
307
Whatsapp_Chat_Exporter/extract_iphone.py
Normal file
307
Whatsapp_Chat_Exporter/extract_iphone.py
Normal file
@@ -0,0 +1,307 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
import sqlite3
|
||||
import json
|
||||
import jinja2
|
||||
import os
|
||||
import requests
|
||||
import shutil
|
||||
import pkgutil
|
||||
from datetime import datetime
|
||||
from mimetypes import MimeTypes
|
||||
|
||||
APPLE_TIME = datetime.timestamp(datetime(2001, 1, 1))
|
||||
|
||||
|
||||
def determine_day(last, current):
|
||||
last = datetime.fromtimestamp(last).date()
|
||||
current = datetime.fromtimestamp(current).date()
|
||||
if last == current:
|
||||
return None
|
||||
else:
|
||||
return current
|
||||
|
||||
|
||||
def messages(db, data):
|
||||
c = db.cursor()
|
||||
# Get contacts
|
||||
c.execute("""SELECT count() FROM ZWACHATSESSION""")
|
||||
total_row_number = c.fetchone()[0]
|
||||
print(f"Gathering contacts...({total_row_number})")
|
||||
|
||||
c.execute("""SELECT ZCONTACTJID, ZPARTNERNAME FROM ZWACHATSESSION; """)
|
||||
row = c.fetchone()
|
||||
while row is not None:
|
||||
data[row[0]] = {"name": row[1], "messages": {}}
|
||||
row = c.fetchone()
|
||||
|
||||
# Get message history
|
||||
c.execute("""SELECT count() FROM ZWAMESSAGE""")
|
||||
total_row_number = c.fetchone()[0]
|
||||
print(f"Gathering messages...(0/{total_row_number})", end="\r")
|
||||
|
||||
c.execute("""SELECT COALESCE(ZFROMJID, ZTOJID),
|
||||
ZWAMESSAGE.Z_PK,
|
||||
ZISFROMME,
|
||||
ZMESSAGEDATE,
|
||||
ZTEXT,
|
||||
ZMESSAGETYPE,
|
||||
ZWAGROUPMEMBER.ZMEMBERJID
|
||||
FROM main.ZWAMESSAGE
|
||||
LEFT JOIN main.ZWAGROUPMEMBER
|
||||
ON main.ZWAMESSAGE.ZGROUPMEMBER = main.ZWAGROUPMEMBER.Z_PK;""")
|
||||
i = 0
|
||||
content = c.fetchone()
|
||||
while content is not None:
|
||||
if content[0] not in data:
|
||||
data[content[0]] = {"name": None, "messages": {}}
|
||||
ts = APPLE_TIME + content[3]
|
||||
data[content[0]]["messages"][content[1]] = {
|
||||
"from_me": bool(content[2]),
|
||||
"timestamp": ts,
|
||||
"time": datetime.fromtimestamp(ts).strftime("%H:%M"),
|
||||
"media": False,
|
||||
"reply": None,
|
||||
"caption": None
|
||||
}
|
||||
if "-" in content[0] and content[2] == 0:
|
||||
name = None
|
||||
if content[6] is not None:
|
||||
if content[6] in data:
|
||||
name = data[content[6]]["name"]
|
||||
if "@" in content[6]:
|
||||
fallback = content[6].split('@')[0]
|
||||
else:
|
||||
fallback = None
|
||||
else:
|
||||
fallback = None
|
||||
data[content[0]]["messages"][content[1]]["sender"] = name or fallback
|
||||
else:
|
||||
data[content[0]]["messages"][content[1]]["sender"] = None
|
||||
if content[5] == 6:
|
||||
# Metadata
|
||||
if "-" in content[0]:
|
||||
# Group
|
||||
if content[4] is not None:
|
||||
# Chnaged name
|
||||
try:
|
||||
int(content[4])
|
||||
except ValueError:
|
||||
msg = "{The group name changed to "f"{content[4]}"" }"
|
||||
data[content[0]]["messages"][content[1]]["data"] = msg
|
||||
else:
|
||||
del data[content[0]]["messages"][content[1]]
|
||||
else:
|
||||
data[content[0]]["messages"][content[1]]["data"] = None
|
||||
else:
|
||||
data[content[0]]["messages"][content[1]]["data"] = None
|
||||
else:
|
||||
# real message
|
||||
if content[2] == 1:
|
||||
if content[5] == 14:
|
||||
msg = "{Message deleted}"
|
||||
else:
|
||||
msg = content[4]
|
||||
else:
|
||||
if content[5] == 14:
|
||||
msg = "{Message deleted}"
|
||||
else:
|
||||
msg = content[4]
|
||||
data[content[0]]["messages"][content[1]]["data"] = msg
|
||||
i += 1
|
||||
if i % 1000 == 0:
|
||||
print(f"Gathering messages...({i}/{total_row_number})", end="\r")
|
||||
content = c.fetchone()
|
||||
print(
|
||||
f"Gathering messages...({total_row_number}/{total_row_number})", end="\r")
|
||||
|
||||
|
||||
def media(db, data, media_folder):
|
||||
c = db.cursor()
|
||||
# Get media
|
||||
c.execute("""SELECT count() FROM ZWAMEDIAITEM""")
|
||||
total_row_number = c.fetchone()[0]
|
||||
print(f"\nGathering media...(0/{total_row_number})", end="\r")
|
||||
i = 0
|
||||
c.execute("""SELECT COALESCE(ZWAMESSAGE.ZFROMJID, ZWAMESSAGE.ZTOJID) as _id,
|
||||
ZMESSAGE,
|
||||
ZMEDIALOCALPATH,
|
||||
ZMEDIAURL,
|
||||
ZVCARDSTRING,
|
||||
ZMEDIAKEY,
|
||||
ZTITLE
|
||||
FROM ZWAMEDIAITEM
|
||||
INNER JOIN ZWAMESSAGE
|
||||
ON ZWAMEDIAITEM.ZMESSAGE = ZWAMESSAGE.Z_PK
|
||||
WHERE ZMEDIALOCALPATH IS NOT NULL
|
||||
ORDER BY _id ASC""")
|
||||
content = c.fetchone()
|
||||
mime = MimeTypes()
|
||||
while content is not None:
|
||||
file_path = f"Message/{content[2]}"
|
||||
data[content[0]]["messages"][content[1]]["media"] = True
|
||||
|
||||
if os.path.isfile(file_path):
|
||||
data[content[0]]["messages"][content[1]]["data"] = file_path
|
||||
if content[4] is None:
|
||||
guess = mime.guess_type(file_path)[0]
|
||||
if guess is not None:
|
||||
data[content[0]]["messages"][content[1]]["mime"] = guess
|
||||
else:
|
||||
data[content[0]]["messages"][content[1]]["mime"] = "data/data"
|
||||
else:
|
||||
data[content[0]]["messages"][content[1]]["mime"] = content[4]
|
||||
else:
|
||||
# if "https://mmg" in content[4]:
|
||||
# try:
|
||||
# r = requests.get(content[3])
|
||||
# if r.status_code != 200:
|
||||
# raise RuntimeError()
|
||||
# except:
|
||||
# data[content[0]]["messages"][content[1]]["data"] = "{The media is missing}"
|
||||
# data[content[0]]["messages"][content[1]]["mime"] = "media"
|
||||
# else:
|
||||
data[content[0]]["messages"][content[1]]["data"] = "{The media is missing}"
|
||||
data[content[0]]["messages"][content[1]]["mime"] = "media"
|
||||
if content[6] is not None:
|
||||
data[content[0]]["messages"][content[1]]["caption"] = content[6]
|
||||
i += 1
|
||||
if i % 100 == 0:
|
||||
print(f"Gathering media...({i}/{total_row_number})", end="\r")
|
||||
content = c.fetchone()
|
||||
print(
|
||||
f"Gathering media...({total_row_number}/{total_row_number})", end="\r")
|
||||
|
||||
|
||||
def vcard(db, data):
|
||||
c = db.cursor()
|
||||
c.execute("""SELECT DISTINCT ZWAVCARDMENTION.ZMEDIAITEM,
|
||||
ZWAMEDIAITEM.ZMESSAGE,
|
||||
COALESCE(ZWAMESSAGE.ZFROMJID,
|
||||
ZWAMESSAGE.ZTOJID) as _id,
|
||||
ZVCARDNAME,
|
||||
ZVCARDSTRING
|
||||
FROM ZWAVCARDMENTION
|
||||
INNER JOIN ZWAMEDIAITEM
|
||||
ON ZWAVCARDMENTION.ZMEDIAITEM = ZWAMEDIAITEM.Z_PK
|
||||
INNER JOIN ZWAMESSAGE
|
||||
ON ZWAMEDIAITEM.ZMESSAGE = ZWAMESSAGE.Z_PK""")
|
||||
rows = c.fetchall()
|
||||
total_row_number = len(rows)
|
||||
print(f"\nGathering vCards...(0/{total_row_number})", end="\r")
|
||||
base = "Message/vCards"
|
||||
for index, row in enumerate(rows):
|
||||
if not os.path.isdir(base):
|
||||
os.mkdir(base)
|
||||
file_name = "".join(x for x in row[3] if x.isalnum())
|
||||
file_path = f"{base}/{file_name[:200]}.vcf"
|
||||
if not os.path.isfile(file_path):
|
||||
with open(file_path, "w", encoding="utf-8") as f:
|
||||
f.write(row[4])
|
||||
data[row[2]]["messages"][row[1]]["data"] = row[3] + \
|
||||
"{ The vCard file cannot be displayed here, however it " \
|
||||
"should be located at " + file_path + "}"
|
||||
data[row[2]]["messages"][row[1]]["mime"] = "text/x-vcard"
|
||||
data[row[2]]["messages"][row[1]]["media"] = True
|
||||
print(f"Gathering vCards...({index + 1}/{total_row_number})", end="\r")
|
||||
|
||||
|
||||
def create_html(data, output_folder):
|
||||
templateLoader = jinja2.FileSystemLoader(searchpath=os.path.dirname(__file__))
|
||||
templateEnv = jinja2.Environment(loader=templateLoader)
|
||||
templateEnv.globals.update(determine_day=determine_day)
|
||||
TEMPLATE_FILE = "whatsapp.html"
|
||||
template = templateEnv.get_template(TEMPLATE_FILE)
|
||||
|
||||
total_row_number = len(data)
|
||||
print(f"\nCreating HTML...(0/{total_row_number})", end="\r")
|
||||
|
||||
if not os.path.isdir(output_folder):
|
||||
os.mkdir(output_folder)
|
||||
|
||||
for current, contact in enumerate(data):
|
||||
if len(data[contact]["messages"]) == 0:
|
||||
continue
|
||||
phone_number = contact.split('@')[0]
|
||||
if "-" in contact:
|
||||
file_name = ""
|
||||
else:
|
||||
file_name = phone_number
|
||||
|
||||
if data[contact]["name"] is not None:
|
||||
if file_name != "":
|
||||
file_name += "-"
|
||||
file_name += data[contact]["name"].replace("/", "-")
|
||||
name = data[contact]["name"]
|
||||
else:
|
||||
name = phone_number
|
||||
|
||||
safe_file_name = ''
|
||||
safe_file_name = "".join(x for x in file_name if x.isalnum() or x in "- ")
|
||||
with open(f"{output_folder}/{safe_file_name}.html", "w", encoding="utf-8") as f:
|
||||
f.write(
|
||||
template.render(
|
||||
name=name,
|
||||
msgs=data[contact]["messages"].values(),
|
||||
my_avatar=None,
|
||||
their_avatar=f"WhatsApp/Avatars/{contact}.j"
|
||||
)
|
||||
)
|
||||
if current % 10 == 0:
|
||||
print(f"Creating HTML...({current}/{total_row_number})", end="\r")
|
||||
|
||||
print(f"Creating HTML...({total_row_number}/{total_row_number})", end="\r")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from optparse import OptionParser
|
||||
parser = OptionParser()
|
||||
parser.add_option(
|
||||
"-w",
|
||||
"--wa",
|
||||
dest="wa",
|
||||
default="wa.db",
|
||||
help="Path to contact database")
|
||||
parser.add_option(
|
||||
"-m",
|
||||
"--media",
|
||||
dest="media",
|
||||
default="Message",
|
||||
help="Path to WhatsApp media folder"
|
||||
)
|
||||
# parser.add_option(
|
||||
# "-t",
|
||||
# "--template",
|
||||
# dest="html",
|
||||
# default="wa.db",
|
||||
# help="Path to HTML template")
|
||||
(options, args) = parser.parse_args()
|
||||
msg_db = "7c7fba66680ef796b916b067077cc246adacf01d"
|
||||
output_folder = "temp"
|
||||
contact_db = options.wa
|
||||
media_folder = options.media
|
||||
|
||||
if len(args) == 1:
|
||||
msg_db = args[0]
|
||||
elif len(args) == 2:
|
||||
msg_db = args[0]
|
||||
output_folder = args[1]
|
||||
|
||||
data = {}
|
||||
|
||||
if os.path.isfile(msg_db):
|
||||
with sqlite3.connect(msg_db) as db:
|
||||
messages(db, data)
|
||||
media(db, data, media_folder)
|
||||
vcard(db, data)
|
||||
create_html(data, output_folder)
|
||||
|
||||
if not os.path.isdir(f"{output_folder}/WhatsApp"):
|
||||
shutil.move(media_folder, f"{output_folder}/")
|
||||
|
||||
with open("result.json", "w") as f:
|
||||
data = json.dumps(data)
|
||||
print(f"\nWriting JSON file...({int(len(data)/1024/1024)}MB)")
|
||||
f.write(data)
|
||||
|
||||
print("Everything is done!")
|
||||
128
Whatsapp_Chat_Exporter/extract_iphone_media.py
Normal file
128
Whatsapp_Chat_Exporter/extract_iphone_media.py
Normal file
@@ -0,0 +1,128 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
import shutil
|
||||
import sqlite3
|
||||
import os
|
||||
import getpass
|
||||
try:
|
||||
from iphone_backup_decrypt import EncryptedBackup, RelativePath
|
||||
except:
|
||||
support_encrypted = False
|
||||
else:
|
||||
support_encrypted = True
|
||||
|
||||
def extract_encrypted(base_dir, password):
|
||||
backup = EncryptedBackup(backup_directory=base_dir, passphrase=password)
|
||||
print("Decrypting WhatsApp database...")
|
||||
backup.extract_file(relative_path=RelativePath.WHATSAPP_MESSAGES, output_filename="7c7fba66680ef796b916b067077cc246adacf01d")
|
||||
backup.extract_file(relative_path=RelativePath.WHATSAPP_CONTACTS, output_filename="ContactsV2.sqlite")
|
||||
data = backup.execute_sql("""SELECT count()
|
||||
FROM Files
|
||||
WHERE relativePath
|
||||
LIKE 'Message/Media/%'"""
|
||||
)
|
||||
total_row_number = data[0][0]
|
||||
print(f"Gathering media...(0/{total_row_number})", end="\r")
|
||||
data = backup.execute_sql("""SELECT fileID,
|
||||
relativePath,
|
||||
flags,
|
||||
file
|
||||
FROM Files
|
||||
WHERE relativePath
|
||||
LIKE 'Message/Media/%'"""
|
||||
)
|
||||
if not os.path.isdir("Message"):
|
||||
os.mkdir("Message")
|
||||
if not os.path.isdir("Message/Media"):
|
||||
os.mkdir("Message/Media")
|
||||
i = 0
|
||||
for row in data:
|
||||
destination = row[1]
|
||||
hashes = row[0]
|
||||
folder = hashes[:2]
|
||||
flags = row[2]
|
||||
file = row[3]
|
||||
if flags == 2:
|
||||
try:
|
||||
os.mkdir(destination)
|
||||
except FileExistsError:
|
||||
pass
|
||||
elif flags == 1:
|
||||
decrypted = backup.decrypt_inner_file(file_id=hashes, file_bplist=file)
|
||||
with open(destination, "wb") as f:
|
||||
f.write(decrypted)
|
||||
i += 1
|
||||
if i % 100 == 0:
|
||||
print(f"Gathering media...({i}/{total_row_number})", end="\r")
|
||||
print(f"Gathering media...({total_row_number}/{total_row_number})", end="\r")
|
||||
|
||||
def is_encrypted(base_dir):
|
||||
with sqlite3.connect(f"{base_dir}/Manifest.db") as f:
|
||||
c = f.cursor()
|
||||
try:
|
||||
c.execute("""SELECT count()
|
||||
FROM Files
|
||||
""")
|
||||
except sqlite3.DatabaseError:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def extract_media(base_dir):
|
||||
if is_encrypted(base_dir):
|
||||
if not support_encrypted:
|
||||
print("You don't have the dependencies to handle encrypted backup.")
|
||||
print("Read more about how to deal with encrypted backup:")
|
||||
print("https://github.com/KnugiHK/Whatsapp-Chat-Exporter/blob/main/README.md#usage")
|
||||
return False
|
||||
password = getpass.getpass("Enter the password:")
|
||||
extract_encrypted(base_dir, password)
|
||||
else:
|
||||
wts_db = os.path.join(base_dir, "7c/7c7fba66680ef796b916b067077cc246adacf01d")
|
||||
if not os.path.isfile(wts_db):
|
||||
print("WhatsApp database not found.")
|
||||
sys.exit(1)
|
||||
else:
|
||||
shutil.copyfile(wts_db, "7c7fba66680ef796b916b067077cc246adacf01d")
|
||||
with sqlite3.connect(f"{base_dir}/Manifest.db") as manifest:
|
||||
c = manifest.cursor()
|
||||
c.execute("""SELECT count()
|
||||
FROM Files
|
||||
WHERE relativePath
|
||||
LIKE 'Message/Media/%'""")
|
||||
total_row_number = c.fetchone()[0]
|
||||
print(f"Gathering media...(0/{total_row_number})", end="\r")
|
||||
c.execute("""SELECT fileID,
|
||||
relativePath,
|
||||
flags
|
||||
FROM Files
|
||||
WHERE relativePath
|
||||
LIKE 'Message/Media/%'""")
|
||||
row = c.fetchone()
|
||||
if not os.path.isdir("Message"):
|
||||
os.mkdir("Message")
|
||||
if not os.path.isdir("Message/Media"):
|
||||
os.mkdir("Message/Media")
|
||||
i = 0
|
||||
while row is not None:
|
||||
destination = row[1]
|
||||
hashes = row[0]
|
||||
folder = hashes[:2]
|
||||
flags = row[2]
|
||||
if flags == 2:
|
||||
os.mkdir(destination)
|
||||
elif flags == 1:
|
||||
shutil.copyfile(f"{base_dir}/{folder}/{hashes}", destination)
|
||||
i += 1
|
||||
if i % 100 == 0:
|
||||
print(f"Gathering media...({i}/{total_row_number})", end="\r")
|
||||
row = c.fetchone()
|
||||
print(f"Gathering media...({total_row_number}/{total_row_number})", end="\r")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from optparse import OptionParser
|
||||
parser = OptionParser()
|
||||
(_, args) = parser.parse_args()
|
||||
base_dir = args[0]
|
||||
extract_media(base_dir)
|
||||
158
Whatsapp_Chat_Exporter/whatsapp.html
Normal file
158
Whatsapp_Chat_Exporter/whatsapp.html
Normal file
@@ -0,0 +1,158 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>Whatsapp - {{ name }}</title>
|
||||
<link rel="stylesheet" href="https://www.w3schools.com/w3css/4/w3.css">
|
||||
<style>
|
||||
@import url('https://fonts.googleapis.com/css2?family=Noto+Sans+HK:wght@300;400&display=swap');
|
||||
html {
|
||||
font-family: 'Noto Sans HK', sans-serif;
|
||||
font-size: 12px;
|
||||
scroll-behavior: smooth;
|
||||
}
|
||||
header {
|
||||
position: fixed;
|
||||
z-index: 20;
|
||||
border-bottom: 2px solid #e3e6e7;
|
||||
font-size: 2em;
|
||||
font-weight: bolder;
|
||||
background-color: white;
|
||||
padding: 20px 0 20px 0;
|
||||
}
|
||||
footer {
|
||||
border-top: 2px solid #e3e6e7;
|
||||
font-size: 2em;
|
||||
padding: 20px 0 20px 0;
|
||||
}
|
||||
article {
|
||||
width:500px;
|
||||
margin:100px auto;
|
||||
z-index:10;
|
||||
font-size: 15px;
|
||||
word-wrap: break-word;
|
||||
}
|
||||
img, video {
|
||||
max-width:100%;
|
||||
}
|
||||
a.anchor {
|
||||
display: block;
|
||||
position: relative;
|
||||
top: -100px;
|
||||
visibility: hidden;
|
||||
}
|
||||
div.reply{
|
||||
font-size: 13px;
|
||||
text-decoration: none;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<header class="w3-center w3-top">Chat history with {{ name }}</header>
|
||||
<article class="w3-container">
|
||||
<div class="table" style="width:100%">
|
||||
{% set last = {'last': 946688461.001} %}
|
||||
{% for msg in msgs -%}
|
||||
<div class="w3-row" style="padding-bottom: 10px">
|
||||
<a class="anchor" id="{{ msg.key_id }}"></a>
|
||||
{% if determine_day(last.last, msg.timestamp) is not none %}
|
||||
<div class="w3-center" style="color:#70777c;padding: 10px 0 10px 0;">{{ determine_day(last.last, msg.timestamp) }}</div>
|
||||
{% if last.update({'last': msg.timestamp}) %}{% endif %}
|
||||
{% endif %}
|
||||
{% if msg.from_me == true %}
|
||||
<div class="w3-row">
|
||||
<div style="float: left; color:#70777c;">{{ msg.time }}</div>
|
||||
<div style="padding-left: 10px; text-align: right; color: #3892da;">You</div>
|
||||
</div>
|
||||
<div class="w3-row">
|
||||
<div class="w3-col m10 l10">
|
||||
<div style="text-align: right;">
|
||||
{% if msg.reply is not none %}
|
||||
<div class="reply">
|
||||
<span style="color: #70777a;">Replying to </span>
|
||||
<a href="#{{msg.reply}}" style="color: #168acc;">"{{ msg.quoted_data or 'media' }}"</a>
|
||||
</div>
|
||||
{% endif %}
|
||||
{% if msg.media == false %}
|
||||
{% filter escape %}{{ msg.data or "{This message is not supported yet}" | replace('\n', '<br>') }}{% endfilter %}
|
||||
{% else %}
|
||||
{% if "image/" in msg.mime %}
|
||||
<a href="{{ msg.data }}"><img src="{{ msg.data }}" /></a>
|
||||
{% elif "audio/" in msg.mime %}
|
||||
<audio controls="controls" autobuffer="autobuffer">
|
||||
<source src="{{ msg.data }}" />
|
||||
</audio>
|
||||
{% elif "video/" in msg.mime %}
|
||||
<video controls="controls" autobuffer="autobuffer">
|
||||
<source src="{{ msg.data }}" />
|
||||
</video>
|
||||
{% elif "/" in msg.mime %}
|
||||
{The file cannot be displayed here, however it should be located at {{ msg.data }}}
|
||||
{% else %}
|
||||
{% filter escape %}{{ msg.data }}{% endfilter %}
|
||||
{% endif %}
|
||||
{% if msg.caption is not none %}
|
||||
<br>
|
||||
{{ msg.caption }}
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
</div>
|
||||
</div>
|
||||
<div class="w3-col m2 l2" style="padding-left: 10px"><img src="{{ my_avatar }}" onerror="this.style.display='none'"></div>
|
||||
</div>
|
||||
{% else %}
|
||||
<div class="w3-row">
|
||||
<div style="padding-right: 10px; float: left; color: #3892da;">
|
||||
{% if msg.sender is not none %}
|
||||
{{ msg.sender }}
|
||||
{% else %}
|
||||
{{ name }}
|
||||
{% endif %}
|
||||
</div>
|
||||
<div style="text-align: right; color:#70777c;">{{ msg.time }}</div>
|
||||
</div>
|
||||
<div class="w3-row">
|
||||
<div class="w3-col m2 l2"><img src="{{ their_avatar }}" onerror="this.style.display='none'"></div>
|
||||
<div class="w3-col m10 l10">
|
||||
<div style="text-align: left;">
|
||||
{% if msg.reply is not none %}
|
||||
<div class="reply">
|
||||
<span style="color: #70777a;">Replying to </span>
|
||||
<a href="#{{msg.reply}}" style="color: #168acc;">"{{ msg.quoted_data or 'media' }}"</a>
|
||||
</div>
|
||||
{% endif %}
|
||||
{% if msg.media == false %}
|
||||
{% filter escape %}{{ msg.data or "{This message is not supported yet}" }}{% endfilter %}
|
||||
{% else %}
|
||||
{% if "image/" in msg.mime %}
|
||||
<a href="{{ msg.data }}"><img src="{{ msg.data }}" /></a>
|
||||
{% elif "audio/" in msg.mime %}
|
||||
<audio controls="controls" autobuffer="autobuffer">
|
||||
<source src="{{ msg.data }}" />
|
||||
</audio>
|
||||
{% elif "video/" in msg.mime %}
|
||||
<video controls="controls" autobuffer="autobuffer">
|
||||
<source src="{{ msg.data }}" />
|
||||
</video>
|
||||
{% elif "/" in msg.mime %}
|
||||
{The file cannot be displayed here, however it should be located at {{ msg.data }}}
|
||||
{% else %}
|
||||
{% filter escape %}{{ msg.data }}{% endfilter %}
|
||||
{% endif %}
|
||||
{% if msg.caption is not none %}
|
||||
<br>
|
||||
{{ msg.caption }}
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
{% endfor %}
|
||||
</div>
|
||||
</article>
|
||||
<footer class="w3-center">
|
||||
End of history
|
||||
</footer>
|
||||
</body>
|
||||
</html>
|
||||
Reference in New Issue
Block a user