From b281f9ea60c2b2792c0146b905fe9feb196a150d Mon Sep 17 00:00:00 2001 From: KnugiHK <24708955+KnugiHK@users.noreply.github.com> Date: Wed, 13 Jan 2021 00:52:01 +0800 Subject: [PATCH] Add support for iPhone --- extract_iphone.py | 150 ++++++++++++++++++++++++++++++++++++++++ extract_iphone_media.py | 33 +++++++++ 2 files changed, 183 insertions(+) create mode 100644 extract_iphone.py create mode 100644 extract_iphone_media.py diff --git a/extract_iphone.py b/extract_iphone.py new file mode 100644 index 0000000..d9717c7 --- /dev/null +++ b/extract_iphone.py @@ -0,0 +1,150 @@ +#!/usr/bin/python3 + +import sqlite3 +import sys +import json +import jinja2 +import os +import base64 +import requests +from datetime import datetime +from mimetypes import MimeTypes + +def determine_day(last, current): + last = datetime.fromtimestamp(last).date() + current = datetime.fromtimestamp(current).date() + if last == current: + return None + else: + return current + +data = {} + +# Get contacts +msg = sqlite3.connect("7c7fba66680ef796b916b067077cc246adacf01d") +c = msg.cursor() +c.execute("""SELECT count() FROM wa_contacts""") +total_row_number = c.fetchone()[0] +print(f"Gathering contacts...({total_row_number})") + +c.execute("""SELECT CONTEACTJID, ZPARTNERNAME FROM ZWACHATSESSION; """) +row = c.fetchone() +while row is not None: + data[row[0]] = {"name": row[1], "messages":{}} + row = c.fetchone() +wa.close() + +# Get message history +c.execute("""SELECT count() FROM ZWAMESSAGE""") +total_row_number = c.fetchone()[0] +print(f"Gathering messages...(0/{total_row_number})", end="\r") + +c.execute("""SELECT COALESCE(ZFROMJID, ZTOJID), Z_PK, ZISFROMME, ZSENTDATE, ZTEXT FROM ZWAMESSAGE;""") +i = 0 +content = c.fetchone() +while content is not None: + if content[0] not in data: + data[content[0]] = {"name": None, "messages": {}} + data[content[0]]["messages"][content[1]] = { + "from_me": bool(content[2]), + "timestamp": content[3]/1000, + "time": datetime.fromtimestamp(content[3]/1000).strftime("%H:%M"), + "data": content[4], + "media": False + } + i += 1 + if i % 1000 == 0: + print(f"Gathering messages...({i}/{total_row_number})", end="\r") + content = c.fetchone() +print(f"Gathering messages...({total_row_number}/{total_row_number})", end="\r") +# Get media + +c.execute("""SELECT count() FROM ZWAMEDIAITEM""") +total_row_number = c.fetchone()[0] +print(f"\nGathering media...(0/{total_row_number})", end="\r") +i = 0 +c.execute("""SELECT COALESCE(ZWAMESSAGE.ZFROMJID, ZWAMESSAGE.ZTOJID) as _id, ZMESSAGE, ZMEDIALOCALPATH, ZMEDIAURL, ZVCARDSTRING, ZMEDIAKEY FROM ZWAMEDIAITEM INNER JOIN ZWAMESSAGE ON ZWAMEDIAITEM.ZMESSAGE = ZWAMESSAGE.Z_PK WHERE ZMEDIALOCALPATH IS NOT NULL ORDER BY _id ASC""") +content = c.fetchone() +mime = MimeTypes() +while content is not None: + file_path = f"WhatsApp/{content[2]}" + if os.path.isfile(file_path): + with open(file_path, "rb") as f: + data[content[0]]["messages"][content[1]]["data"] = base64.b64encode(f.read()).decode("utf-8") + data[content[0]]["messages"][content[1]]["media"] = True + if content[4] is None: + guess = mime.guess_type(file_path)[0] + if guess is not None: + data[content[0]]["messages"][content[1]]["mime"] = guess + else: + data[content[0]]["messages"][content[1]]["mime"] = "image/jpeg" + else: + data[content[0]]["messages"][content[1]]["mime"] = content[4] + else: + if "https://mmg" in content[4]: + try: + r = requests.get(content[3]) + if r.status_code != 200: + raise RuntimeError() + except: + data[content[0]]["messages"][content[1]]["data"] = "{The media is missing}" + data[content[0]]["messages"][content[1]]["media"] = True + data[content[0]]["messages"][content[1]]["mime"] = "media" + else: + open('temp.file', 'wb').write(r.content) + open('temp.asdasda', "a").write(content[3]) + else: + data[content[0]]["messages"][content[1]]["data"] = "{The media is missing}" + data[content[0]]["messages"][content[1]]["media"] = True + data[content[0]]["messages"][content[1]]["mime"] = "media" + i += 1 + if i % 1000 == 0: + print(f"Gathering media...({i}/{total_row_number})", end="\r") + content = c.fetchone() +print(f"Gathering media...({total_row_number}/{total_row_number})", end="\r") + +templateLoader = jinja2.FileSystemLoader(searchpath="./") +templateEnv = jinja2.Environment(loader=templateLoader) +templateEnv.globals.update(determine_day=determine_day) +TEMPLATE_FILE = "whatsapp.html" +template = templateEnv.get_template(TEMPLATE_FILE) + +total_row_number = len(data) +print(f"\nCreating HTML...(0/{total_row_number})", end="\r") + +if len(sys.argv) < 3: + output_folder = "temp" +else: + output_folder = sys.argv[2] + +if not os.path.isdir(output_folder): + os.mkdir(output_folder) + +for current, i in enumerate(data): + if len(data[i]["messages"]) == 0: + continue + phone_number = i.split('@')[0] + if "-"in i: + file_name = "" + else: + file_name = phone_number + + if data[i]["name"] is not None: + if file_name != "": + file_name += "-" + file_name += data[i]["name"].replace("/", "-") + with open("asd", "w") as f: + f.write(json.dumps(data[i]["messages"])) + with open(f"{output_folder}/{file_name}.html", "w", encoding="utf-8") as f: + f.write(template.render(name=data[i]["name"] if data[i]["name"] is not None else phone_number, msgs=data[i]["messages"].values())) + if current % 10 == 0: + print(f"Creating HTML...({current}/{total_row_number})", end="\r") + +print(f"Creating HTML...({total_row_number}/{total_row_number})", end="\r") + +with open("result.json", "w") as f: + data = json.dumps(data) + print(f"\nWriting JSON file...({int(len(data)/1024/1024)}MB)") + f.write(data) + +print("Everything is done!") diff --git a/extract_iphone_media.py b/extract_iphone_media.py new file mode 100644 index 0000000..0354b61 --- /dev/null +++ b/extract_iphone_media.py @@ -0,0 +1,33 @@ +#!/usr/bin/python3 + +import shutil +import sqlite3 +import os + +manifest = sqlite3.connect(f"{sys.argv[2]}/Manifest.db") +c = manifest.cursor() +c.execute("""SELECT count() FROM Files WHERE relativePath LIKE 'Message/Media/%'""") +total_row_number = c.fetchone()[0] +print(f"Gathering media...(0/{total_row_number})", end="\r") +c.execute("""SELECT fileID, relativePath, flags FROM Files WHERE relativePath LIKE 'Message/Media/%'""") +row = c.fetchone() +if not os.path.isdir("Message"): + os.mkdir("Message") +if not os.path.isdir("Message/Media"): + os.mkdir("Message/Media") +i = 0 +while row is not None: + destination = row[1] + hashes = row[0] + folder = hashes[:2] + flags = row[2] + if flags == 2: + os.mkdir(destination) + elif flags == 1: + shutil.copyfile(f"{sys.argv[2]}/{folder}/{hashes}", destination) + i += 1 + if i % 100 == 0: + print(f"Gathering media...({i}/{total_row_number})", end="\r") + row = c.fetchone() +print(f"Gathering media...({total_row_number}/{total_row_number})", end="\r") +manifest.close() \ No newline at end of file