diff --git a/extract.py b/extract.py index 9f02bd9..3bb2f1a 100644 --- a/extract.py +++ b/extract.py @@ -8,6 +8,7 @@ import os import base64 import requests import shutil +import re from datetime import datetime from mimetypes import MimeTypes @@ -43,7 +44,8 @@ c.execute("""SELECT count() FROM messages""") total_row_number = c.fetchone()[0] print(f"Gathering messages...(0/{total_row_number})", end="\r") -c.execute("""SELECT key_remote_jid, _id, key_from_me, timestamp, data FROM messages; """) +phone_number_re = re.compile(r"[0-9]+@s.whatsapp.net") +c.execute("""SELECT key_remote_jid, _id, key_from_me, timestamp, data, status, edit_version, thumb_image, remote_resource, media_wa_type, latitude, longitude FROM messages; """) i = 0 content = c.fetchone() while content is not None: @@ -53,9 +55,76 @@ while content is not None: "from_me": bool(content[2]), "timestamp": content[3]/1000, "time": datetime.fromtimestamp(content[3]/1000).strftime("%H:%M"), - "data": content[4], "media": False } + if "-" in content[0] and content[2] == 0: + if content[8] in data: + name = data[content[8]]["name"] + else: + name = None + data[content[0]]["messages"][content[1]]["sender"] = name or content[8].split('@')[0] + else: + data[content[0]]["messages"][content[1]]["sender"] = None + + if content[5] == 6: + if "-" in content[0]: + # Is Group + if content[4] is not None: + try: + int(content[4]) + except: + data[content[0]]["messages"][content[1]]["data"] = "{The group name changed to "f"{content[4]}"" }" + else: + del data[content[0]]["messages"][content[1]] + else: + thumb_image = content[7] + if thumb_image is not None: + if b"\x00\x00\x01\x74\x00\x1A" in thumb_image: + # Add user + added = phone_number_re.search(thumb_image.decode("unicode_escape"))[0] + if added in data: + name_right = data[added]["name"] + else: + name_right = added.split('@')[0] + if content[8] is not None: + if content[8] in data: + name_left = data[content[8]]["name"] + else: + name_left = content[8].split('@')[0] + data[content[0]]["messages"][content[1]]["data"] = "{"f"{name_left}"f" added {name_right or 'You'}""}" + else: + data[content[0]]["messages"][content[1]]["data"] = "{"f"Added {name_right or 'You'}""}" + if b"\xac\xed\x00\x05\x74\x00" in thumb_image: + # Changed number + original = content[8].split('@')[0] + changed = thumb_image[7:].decode().split('@')[0] + data[content[0]]["messages"][content[1]]["data"] = "{"f"{original} changed to {changed}""}" + else: + if content[4] is None: + del data[content[0]]["messages"][content[1]] + else: + # Private chat + if content[4] is None and content[7] is None: + del data[content[0]]["messages"][content[1]] + + else: + if content[2] == 1: + if content[5] == 5 and content[6] == 7: + data[content[0]]["messages"][content[1]]["data"] = "{Message deleted}" + else: + if content[9] == "5": + data[content[0]]["messages"][content[1]]["data"] = "{ Location shared: "f"{content[10], content[11]}"" }" + else: + data[content[0]]["messages"][content[1]]["data"] = content[4] + else: + if content[5] == 0 and content[6] == 7: + data[content[0]]["messages"][content[1]]["data"] = "{Message deleted}" + else: + if content[9] == "5": + data[content[0]]["messages"][content[1]]["data"] = "{ Location shared: "f"{content[10], content[11]}"" }" + else: + data[content[0]]["messages"][content[1]]["data"] = content[4] + i += 1 if i % 1000 == 0: print(f"Gathering messages...({i}/{total_row_number})", end="\r") @@ -102,6 +171,23 @@ while content is not None: content = c.fetchone() print(f"Gathering media...({total_row_number}/{total_row_number})", end="\r") +c.execute("""SELECT message_row_id, messages.key_remote_jid, vcard, messages.media_name FROM messages_vcards INNER JOIN messages ON messages_vcards.message_row_id = messages._id ORDER BY messages.key_remote_jid ASC""") +rows = c.fetchall() +total_row_number = len(rows) +print(f"\nGathering vCards...(0/{total_row_number})", end="\r") +base = "WhatsApp/vCards" +for index, row in enumerate(rows): + if not os.path.isdir(base): + os.mkdir(base) + file_name = "".join(x for x in row[3] if x.isalnum()) + file_path = f"{base}/{file_name}.vcf" + if not os.path.isfile(file_path): + with open(file_path, "w", encoding="utf-8") as f: + f.write(row[2]) + data[row[1]]["messages"][row[0]]["data"] = row[3] + "{ The vCard file cannot be displayed here, however it should be located at " + file_path + "}" + data[row[1]]["messages"][row[0]]["mime"] = "x-vcard" + print(f"Gathering vCards...({index + 1}/{total_row_number})", end="\r") + templateLoader = jinja2.FileSystemLoader(searchpath="./") templateEnv = jinja2.Environment(loader=templateLoader) templateEnv.globals.update(determine_day=determine_day) @@ -135,8 +221,9 @@ for current, i in enumerate(data): name = data[i]["name"] else: name = phone_number - - with open(f"{output_folder}/{file_name}.html", "w", encoding="utf-8") as f: + safe_file_name = '' + safe_file_name = "".join(x for x in file_name if x.isalnum()) + with open(f"{output_folder}/{safe_file_name}.html", "w", encoding="utf-8") as f: f.write(template.render(name=name, msgs=data[i]["messages"].values(), my_avatar=None, their_avatar=f"WhatsApp/Avatars/{i}.j")) if current % 10 == 0: print(f"Creating HTML...({current}/{total_row_number})", end="\r") diff --git a/extract_iphone.py b/extract_iphone.py index 489f85d..8c3fc08 100644 --- a/extract_iphone.py +++ b/extract_iphone.py @@ -41,7 +41,7 @@ total_row_number = c.fetchone()[0] apple_time = datetime.timestamp(datetime(2001,1,1)) print(f"Gathering messages...(0/{total_row_number})", end="\r") -c.execute("""SELECT COALESCE(ZFROMJID, ZTOJID), Z_PK, ZISFROMME, ZMESSAGEDATE, ZTEXT FROM ZWAMESSAGE;""") +c.execute("""SELECT COALESCE(ZFROMJID, ZTOJID), ZWAMESSAGE.Z_PK, ZISFROMME, ZMESSAGEDATE, ZTEXT, ZMESSAGETYPE, ZWAGROUPMEMBER.ZMEMBERJID FROM main.ZWAMESSAGE LEFT JOIN main.ZWAGROUPMEMBER ON main.ZWAMESSAGE.ZGROUPMEMBER = main.ZWAGROUPMEMBER.Z_PK;""") i = 0 content = c.fetchone() while content is not None: @@ -52,9 +52,51 @@ while content is not None: "from_me": bool(content[2]), "timestamp": ts, "time": datetime.fromtimestamp(ts).strftime("%H:%M"), - "data": content[4], "media": False } + if "-" in content[0] and content[2] == 0: + name = None + if content[6] is not None: + if content[6] in data: + name = data[content[6]]["name"] + if "@" in content[6]: + fallback = content[6].split('@')[0] + else: + fallback = None + else: + fallback = None + data[content[0]]["messages"][content[1]]["sender"] = name or fallback + else: + data[content[0]]["messages"][content[1]]["sender"] = None + if content[5] == 6: + # Metadata + if "-" in content[0]: + # Group + if content[4] is not None: + # Chnaged name + try: + int(content[4]) + except: + data[content[0]]["messages"][content[1]]["data"] = "{The group name changed to "f"{content[4]}"" }" + else: + del data[content[0]]["messages"][content[1]] + else: + data[content[0]]["messages"][content[1]]["data"] = None + else: + data[content[0]]["messages"][content[1]]["data"] = None + else: + # real message + if content[2] == 1: + if content[5] == 14: + data[content[0]]["messages"][content[1]]["data"] = "{Message deleted}" + else: + data[content[0]]["messages"][content[1]]["data"] = content[4] + else: + if content[5] == 14: + data[content[0]]["messages"][content[1]]["data"] = "{Message deleted}" + else: + data[content[0]]["messages"][content[1]]["data"] = content[4] + i += 1 if i % 1000 == 0: print(f"Gathering messages...({i}/{total_row_number})", end="\r") @@ -100,6 +142,24 @@ while content is not None: content = c.fetchone() print(f"Gathering media...({total_row_number}/{total_row_number})", end="\r") +c.execute("""SELECT DISTINCT ZWAVCARDMENTION.ZMEDIAITEM, ZWAMEDIAITEM.ZMESSAGE, COALESCE(ZWAMESSAGE.ZFROMJID, ZWAMESSAGE.ZTOJID) as _id, ZVCARDNAME, ZVCARDSTRING FROM ZWAVCARDMENTION INNER JOIN ZWAMEDIAITEM ON ZWAVCARDMENTION.ZMEDIAITEM = ZWAMEDIAITEM.Z_PK INNER JOIN ZWAMESSAGE ON ZWAMEDIAITEM.ZMESSAGE = ZWAMESSAGE.Z_PK""") +rows = c.fetchall() +total_row_number = len(rows) +print(f"\nGathering vCards...(0/{total_row_number})", end="\r") +base = "Message/vCards" +for index, row in enumerate(rows): + if not os.path.isdir(base): + os.mkdir(base) + file_name = "".join(x for x in row[3] if x.isalnum()) + file_path = f"{base}/{file_name[:200]}.vcf" + if not os.path.isfile(file_path): + with open(file_path, "w", encoding="utf-8") as f: + f.write(row[4]) + data[row[2]]["messages"][row[1]]["data"] = row[3] + "{ The vCard file cannot be displayed here, however it should be located at " + file_path + "}" + data[row[2]]["messages"][row[1]]["mime"] = "x-vcard" + data[row[2]]["messages"][row[1]]["media"] = True + print(f"Gathering vCards...({index + 1}/{total_row_number})", end="\r") + templateLoader = jinja2.FileSystemLoader(searchpath="./") templateEnv = jinja2.Environment(loader=templateLoader) templateEnv.globals.update(determine_day=determine_day) @@ -133,8 +193,10 @@ for current, i in enumerate(data): name = data[i]["name"] else: name = phone_number - - with open(f"{output_folder}/{file_name}.html", "w", encoding="utf-8") as f: + + safe_file_name = '' + safe_file_name = "".join(x for x in file_name if x.isalnum()) + with open(f"{output_folder}/{safe_file_name}.html", "w", encoding="utf-8") as f: f.write(template.render(name=name, msgs=data[i]["messages"].values(), my_avatar=None, their_avatar=f"WhatsApp/Avatars/{i}.j")) if current % 10 == 0: print(f"Creating HTML...({current}/{total_row_number})", end="\r") diff --git a/whatsapp.html b/whatsapp.html index 696eab1..2c7c89e 100644 --- a/whatsapp.html +++ b/whatsapp.html @@ -30,7 +30,6 @@ } img, video { max-width:100%; - width: 70%; } @@ -55,7 +54,7 @@