diff --git a/extract.py b/extract.py index 6ed5454..ed7847e 100644 --- a/extract.py +++ b/extract.py @@ -4,6 +4,8 @@ import sqlite3 import sys import json import jinja2 +import os +import base64 from datetime import datetime def determine_day(last, current): @@ -16,19 +18,29 @@ def determine_day(last, current): return current data = {} -wa = sqlite3.connect("wa.db") +# Get contacts +wa = sqlite3.connect("wa.db") c = wa.cursor() -c.execute("""SELECT jid, display_name FROM "main"."wa_contacts"; """) +c.execute("""SELECT count() FROM wa_contacts""") +total_row_number = c.fetchone()[0] +print(f"Gathering contacts...({total_row_number})") + +c.execute("""SELECT jid, display_name FROM wa_contacts; """) row = c.fetchone() while row is not None: data[row[0]] = {"name": row[1], "messages":{}} row = c.fetchone() +# Get message history msg = sqlite3.connect("msgstore.db") c = msg.cursor() +c.execute("""SELECT count() FROM messages""") +total_row_number = c.fetchone()[0] +print(f"Gathering messages...(0/{total_row_number})", end="\r") -c.execute("""SELECT key_remote_jid, _id, key_from_me, timestamp, data FROM "main"."messages"; """) +c.execute("""SELECT key_remote_jid, _id, key_from_me, timestamp, data FROM messages; """) +i = 0 content = c.fetchone() while content is not None: if content[0] not in data: @@ -37,9 +49,34 @@ while content is not None: "from_me": bool(content[2]), "timestamp": content[3]/1000, "time": datetime.fromtimestamp(content[3]/1000).strftime("%H:%M"), - "data": content[4] + "data": content[4], + "media": False } + i += 1 + if i % 1000 == 0: + print(f"Gathering messages...({i}/{total_row_number})", end="\r") content = c.fetchone() +print(f"Gathering messages...({total_row_number}/{total_row_number})", end="\r") +# Get media + +c.execute("""SELECT count() FROM message_media""") +total_row_number = c.fetchone()[0] +print(f"\nGathering media...(0/{total_row_number})", end="\r") +i = 0 +c.execute("""SELECT messages.key_remote_jid, message_row_id, file_path, message_url, mime_type, media_key FROM message_media INNER JOIN messages ON message_media.message_row_id = messages._id ORDER BY messages.key_remote_jid ASC""") +content = c.fetchone() +while content is not None: + file_path = f"WhatsApp/{content[2]}" + if os.path.isfile(file_path): + with open(file_path, "rb") as f: + data[content[0]]["messages"][content[1]]["data"] = base64.b64encode(f.read()).decode("utf-8") + data[content[0]]["messages"][content[1]]["media"] = True + data[content[0]]["messages"][content[1]]["mime"] = content[4] + i += 1 + if i % 1000 == 0: + print(f"Gathering media...({i}/{total_row_number})", end="\r") + content = c.fetchone() +print(f"Gathering media...({total_row_number}/{total_row_number})", end="\r") templateLoader = jinja2.FileSystemLoader(searchpath="./") templateEnv = jinja2.Environment(loader=templateLoader) @@ -47,7 +84,10 @@ templateEnv.globals.update(determine_day=determine_day) TEMPLATE_FILE = "whatsapp.html" template = templateEnv.get_template(TEMPLATE_FILE) -for i in data: +total_row_number = len(data) +print(f"\nCreating HTML...(0/{total_row_number})", end="\r") + +for current, i in enumerate(data): if len(data[i]["messages"]) == 0: continue phone_number = i.split('@')[0] @@ -63,6 +103,12 @@ for i in data: with open(f"temp/{file_name}.html", "w", encoding="utf-8") as f: f.write(template.render(name=data[i]["name"] if data[i]["name"] is not None else phone_number, msgs=data[i]["messages"].values())) + if current % 10 == 0: + print(f"Creating HTML...({current}/{total_row_number})", end="\r") +print(f"\nCreating HTML...({total_row_number}/{total_row_number})", end="\r") +print("\nWriting JSON file...") with open("result.json", "w") as f: - f.write(json.dumps(data)) \ No newline at end of file + f.write(json.dumps(data)) + +print("Everything is done!") diff --git a/whatsapp.html b/whatsapp.html index 687ca7e..b02c6e8 100644 --- a/whatsapp.html +++ b/whatsapp.html @@ -48,7 +48,13 @@