diff --git a/extract.py b/extract.py index 997b3ea..2228817 100644 --- a/extract.py +++ b/extract.py @@ -21,12 +21,9 @@ def determine_day(last, current): else: return current -data = {} - -# Get contacts -if os.path.isfile("wa.db"): - wa = sqlite3.connect("wa.db") - c = wa.cursor() +def contacts(db, data): + # Get contacts + c = db.cursor() c.execute("""SELECT count() FROM wa_contacts""") total_row_number = c.fetchone()[0] print(f"Gathering contacts...({total_row_number})") @@ -36,225 +33,271 @@ if os.path.isfile("wa.db"): while row is not None: data[row[0]] = {"name": row[1], "messages":{}} row = c.fetchone() - wa.close() -# Get message history -msg = sqlite3.connect("msgstore.db") -c = msg.cursor() -c.execute("""SELECT count() FROM messages""") -total_row_number = c.fetchone()[0] -print(f"Gathering messages...(0/{total_row_number})", end="\r") +def messages(db, data): + # Get message history + c = db.cursor() + c.execute("""SELECT count() FROM messages""") + total_row_number = c.fetchone()[0] + print(f"Gathering messages...(0/{total_row_number})", end="\r") -phone_number_re = re.compile(r"[0-9]+@s.whatsapp.net") -c.execute("""SELECT messages.key_remote_jid, messages._id, messages.key_from_me, messages.timestamp, messages.data, messages.status, messages.edit_version, messages.thumb_image, messages.remote_resource, messages.media_wa_type, messages.latitude, messages.longitude, messages_quotes.key_id as quoted, messages.key_id, messages_quotes.data, messages.media_caption FROM messages LEFT JOIN messages_quotes ON messages.quoted_row_id = messages_quotes._id; """) -i = 0 -content = c.fetchone() -while content is not None: - if content[0] not in data: - data[content[0]] = {"name": None, "messages": {}} - data[content[0]]["messages"][content[1]] = { - "from_me": bool(content[2]), - "timestamp": content[3]/1000, - "time": datetime.fromtimestamp(content[3]/1000).strftime("%H:%M"), - "media": False, - "key_id": content[13] - } - if "-" in content[0] and content[2] == 0: - name = None - if content[8] in data: - name = data[content[8]]["name"] - if "@" in content[8]: - fallback = content[8].split('@')[0] + phone_number_re = re.compile(r"[0-9]+@s.whatsapp.net") + c.execute("""SELECT messages.key_remote_jid, messages._id, messages.key_from_me, messages.timestamp, messages.data, messages.status, messages.edit_version, messages.thumb_image, messages.remote_resource, messages.media_wa_type, messages.latitude, messages.longitude, messages_quotes.key_id as quoted, messages.key_id, messages_quotes.data, messages.media_caption FROM messages LEFT JOIN messages_quotes ON messages.quoted_row_id = messages_quotes._id; """) + i = 0 + content = c.fetchone() + while content is not None: + if content[0] not in data: + data[content[0]] = {"name": None, "messages": {}} + data[content[0]]["messages"][content[1]] = { + "from_me": bool(content[2]), + "timestamp": content[3]/1000, + "time": datetime.fromtimestamp(content[3]/1000).strftime("%H:%M"), + "media": False, + "key_id": content[13] + } + if "-" in content[0] and content[2] == 0: + name = None + if content[8] in data: + name = data[content[8]]["name"] + if "@" in content[8]: + fallback = content[8].split('@')[0] + else: + fallback = None else: fallback = None + + data[content[0]]["messages"][content[1]]["sender"] = name or fallback else: - fallback = None + data[content[0]]["messages"][content[1]]["sender"] = None - data[content[0]]["messages"][content[1]]["sender"] = name or fallback - else: - data[content[0]]["messages"][content[1]]["sender"] = None - - if content[12] is not None: - data[content[0]]["messages"][content[1]]["reply"] = content[12] - data[content[0]]["messages"][content[1]]["quoted_data"] = content[14] - else: - data[content[0]]["messages"][content[1]]["reply"] = None - - if content[15] is not None: - data[content[0]]["messages"][content[1]]["caption"] = content[15] - else: - data[content[0]]["messages"][content[1]]["caption"] = None - - if content[5] == 6: - if "-" in content[0]: - # Is Group - if content[4] is not None: - try: - int(content[4]) - except: - data[content[0]]["messages"][content[1]]["data"] = "{The group name changed to "f"{content[4]}"" }" - else: - del data[content[0]]["messages"][content[1]] - else: - thumb_image = content[7] - if thumb_image is not None: - if b"\x00\x00\x01\x74\x00\x1A" in thumb_image: - # Add user - added = phone_number_re.search(thumb_image.decode("unicode_escape"))[0] - if added in data: - name_right = data[added]["name"] - else: - name_right = added.split('@')[0] - if content[8] is not None: - if content[8] in data: - name_left = data[content[8]]["name"] - else: - name_left = content[8].split('@')[0] - data[content[0]]["messages"][content[1]]["data"] = "{"f"{name_left}"f" added {name_right or 'You'}""}" - else: - data[content[0]]["messages"][content[1]]["data"] = "{"f"Added {name_right or 'You'}""}" - if b"\xac\xed\x00\x05\x74\x00" in thumb_image: - # Changed number - original = content[8].split('@')[0] - changed = thumb_image[7:].decode().split('@')[0] - data[content[0]]["messages"][content[1]]["data"] = "{"f"{original} changed to {changed}""}" - else: - if content[4] is None: + if content[12] is not None: + data[content[0]]["messages"][content[1]]["reply"] = content[12] + data[content[0]]["messages"][content[1]]["quoted_data"] = content[14] + else: + data[content[0]]["messages"][content[1]]["reply"] = None + + if content[15] is not None: + data[content[0]]["messages"][content[1]]["caption"] = content[15] + else: + data[content[0]]["messages"][content[1]]["caption"] = None + + if content[5] == 6: + if "-" in content[0]: + # Is Group + if content[4] is not None: + try: + int(content[4]) + except: + data[content[0]]["messages"][content[1]]["data"] = "{The group name changed to "f"{content[4]}"" }" + else: del data[content[0]]["messages"][content[1]] - else: - # Private chat - if content[4] is None and content[7] is None: - del data[content[0]]["messages"][content[1]] - - else: - if content[2] == 1: - if content[5] == 5 and content[6] == 7: - data[content[0]]["messages"][content[1]]["data"] = "{Message deleted}" - else: - if content[9] == "5": - data[content[0]]["messages"][content[1]]["data"] = "{ Location shared: "f"{content[10], content[11]}"" }" else: - data[content[0]]["messages"][content[1]]["data"] = content[4] - else: - if content[5] == 0 and content[6] == 7: - data[content[0]]["messages"][content[1]]["data"] = "{Message deleted}" + thumb_image = content[7] + if thumb_image is not None: + if b"\x00\x00\x01\x74\x00\x1A" in thumb_image: + # Add user + added = phone_number_re.search(thumb_image.decode("unicode_escape"))[0] + if added in data: + name_right = data[added]["name"] + else: + name_right = added.split('@')[0] + if content[8] is not None: + if content[8] in data: + name_left = data[content[8]]["name"] + else: + name_left = content[8].split('@')[0] + data[content[0]]["messages"][content[1]]["data"] = "{"f"{name_left}"f" added {name_right or 'You'}""}" + else: + data[content[0]]["messages"][content[1]]["data"] = "{"f"Added {name_right or 'You'}""}" + if b"\xac\xed\x00\x05\x74\x00" in thumb_image: + # Changed number + original = content[8].split('@')[0] + changed = thumb_image[7:].decode().split('@')[0] + data[content[0]]["messages"][content[1]]["data"] = "{"f"{original} changed to {changed}""}" + else: + if content[4] is None: + del data[content[0]]["messages"][content[1]] else: - if content[9] == "5": - data[content[0]]["messages"][content[1]]["data"] = "{ Location shared: "f"{content[10], content[11]}"" }" - else: - data[content[0]]["messages"][content[1]]["data"] = content[4] + # Private chat + if content[4] is None and content[7] is None: + del data[content[0]]["messages"][content[1]] - i += 1 - if i % 1000 == 0: - print(f"Gathering messages...({i}/{total_row_number})", end="\r") - content = c.fetchone() -print(f"Gathering messages...({total_row_number}/{total_row_number})", end="\r") -# Get media - -c.execute("""SELECT count() FROM message_media""") -total_row_number = c.fetchone()[0] -print(f"\nGathering media...(0/{total_row_number})", end="\r") -i = 0 -c.execute("""SELECT messages.key_remote_jid, message_row_id, file_path, message_url, mime_type, media_key FROM message_media INNER JOIN messages ON message_media.message_row_id = messages._id ORDER BY messages.key_remote_jid ASC""") -content = c.fetchone() -mime = MimeTypes() -while content is not None: - file_path = f"WhatsApp/{content[2]}" - data[content[0]]["messages"][content[1]]["media"] = True - if os.path.isfile(file_path): - data[content[0]]["messages"][content[1]]["data"] = file_path - if content[4] is None: - guess = mime.guess_type(file_path)[0] - if guess is not None: - data[content[0]]["messages"][content[1]]["mime"] = guess - else: - data[content[0]]["messages"][content[1]]["mime"] = "data/data" else: - data[content[0]]["messages"][content[1]]["mime"] = content[4] - else: - # if "https://mmg" in content[4]: - # try: - # r = requests.get(content[3]) - # if r.status_code != 200: - # raise RuntimeError() - # except: - # data[content[0]]["messages"][content[1]]["data"] = "{The media is missing}" - # data[content[0]]["messages"][content[1]]["media"] = True - # data[content[0]]["messages"][content[1]]["mime"] = "media" - # else: - data[content[0]]["messages"][content[1]]["data"] = "{The media is missing}" - data[content[0]]["messages"][content[1]]["mime"] = "media" - i += 1 - if i % 100 == 0: - print(f"Gathering media...({i}/{total_row_number})", end="\r") + if content[2] == 1: + if content[5] == 5 and content[6] == 7: + data[content[0]]["messages"][content[1]]["data"] = "{Message deleted}" + else: + if content[9] == "5": + data[content[0]]["messages"][content[1]]["data"] = "{ Location shared: "f"{content[10], content[11]}"" }" + else: + data[content[0]]["messages"][content[1]]["data"] = content[4] + else: + if content[5] == 0 and content[6] == 7: + data[content[0]]["messages"][content[1]]["data"] = "{Message deleted}" + else: + if content[9] == "5": + data[content[0]]["messages"][content[1]]["data"] = "{ Location shared: "f"{content[10], content[11]}"" }" + else: + data[content[0]]["messages"][content[1]]["data"] = content[4] + + i += 1 + if i % 1000 == 0: + print(f"Gathering messages...({i}/{total_row_number})", end="\r") + content = c.fetchone() + print(f"Gathering messages...({total_row_number}/{total_row_number})", end="\r") + +def media(db, data, media_folder): + # Get media + c = db.cursor() + c.execute("""SELECT count() FROM message_media""") + total_row_number = c.fetchone()[0] + print(f"\nGathering media...(0/{total_row_number})", end="\r") + i = 0 + c.execute("""SELECT messages.key_remote_jid, message_row_id, file_path, message_url, mime_type, media_key FROM message_media INNER JOIN messages ON message_media.message_row_id = messages._id ORDER BY messages.key_remote_jid ASC""") content = c.fetchone() -print(f"Gathering media...({total_row_number}/{total_row_number})", end="\r") + mime = MimeTypes() + while content is not None: + file_path = f"{media_folder}/{content[2]}" + data[content[0]]["messages"][content[1]]["media"] = True + if os.path.isfile(file_path): + data[content[0]]["messages"][content[1]]["data"] = file_path + if content[4] is None: + guess = mime.guess_type(file_path)[0] + if guess is not None: + data[content[0]]["messages"][content[1]]["mime"] = guess + else: + data[content[0]]["messages"][content[1]]["mime"] = "data/data" + else: + data[content[0]]["messages"][content[1]]["mime"] = content[4] + else: + # if "https://mmg" in content[4]: + # try: + # r = requests.get(content[3]) + # if r.status_code != 200: + # raise RuntimeError() + # except: + # data[content[0]]["messages"][content[1]]["data"] = "{The media is missing}" + # data[content[0]]["messages"][content[1]]["media"] = True + # data[content[0]]["messages"][content[1]]["mime"] = "media" + # else: + data[content[0]]["messages"][content[1]]["data"] = "{The media is missing}" + data[content[0]]["messages"][content[1]]["mime"] = "media" + i += 1 + if i % 100 == 0: + print(f"Gathering media...({i}/{total_row_number})", end="\r") + content = c.fetchone() + print(f"Gathering media...({total_row_number}/{total_row_number})", end="\r") -c.execute("""SELECT message_row_id, messages.key_remote_jid, vcard, messages.media_name FROM messages_vcards INNER JOIN messages ON messages_vcards.message_row_id = messages._id ORDER BY messages.key_remote_jid ASC""") -rows = c.fetchall() -total_row_number = len(rows) -print(f"\nGathering vCards...(0/{total_row_number})", end="\r") -base = "WhatsApp/vCards" -for index, row in enumerate(rows): - if not os.path.isdir(base): - os.mkdir(base) - file_name = "".join(x for x in row[3] if x.isalnum()) - file_path = f"{base}/{file_name}.vcf" - if not os.path.isfile(file_path): - with open(file_path, "w", encoding="utf-8") as f: - f.write(row[2]) - data[row[1]]["messages"][row[0]]["data"] = row[3] + "{ The vCard file cannot be displayed here, however it should be located at " + file_path + "}" - data[row[1]]["messages"][row[0]]["mime"] = "x-vcard" - print(f"Gathering vCards...({index + 1}/{total_row_number})", end="\r") - -templateLoader = jinja2.FileSystemLoader(searchpath="./") -templateEnv = jinja2.Environment(loader=templateLoader) -templateEnv.globals.update(determine_day=determine_day) -TEMPLATE_FILE = "whatsapp.html" -template = templateEnv.get_template(TEMPLATE_FILE) +def vcard(db, data): + c = db.cursor() + c.execute("""SELECT message_row_id, messages.key_remote_jid, vcard, messages.media_name FROM messages_vcards INNER JOIN messages ON messages_vcards.message_row_id = messages._id ORDER BY messages.key_remote_jid ASC""") + rows = c.fetchall() + total_row_number = len(rows) + print(f"\nGathering vCards...(0/{total_row_number})", end="\r") + base = "WhatsApp/vCards" + for index, row in enumerate(rows): + if not os.path.isdir(base): + os.mkdir(base) + file_name = "".join(x for x in row[3] if x.isalnum()) + file_path = f"{base}/{file_name}.vcf" + if not os.path.isfile(file_path): + with open(file_path, "w", encoding="utf-8") as f: + f.write(row[2]) + data[row[1]]["messages"][row[0]]["data"] = row[3] + "{ The vCard file cannot be displayed here, however it should be located at " + file_path + "}" + data[row[1]]["messages"][row[0]]["mime"] = "x-vcard" + print(f"Gathering vCards...({index + 1}/{total_row_number})", end="\r") -total_row_number = len(data) -print(f"\nCreating HTML...(0/{total_row_number})", end="\r") +def create_html(data, output_folder): + templateLoader = jinja2.FileSystemLoader(searchpath="./") + templateEnv = jinja2.Environment(loader=templateLoader) + templateEnv.globals.update(determine_day=determine_day) + TEMPLATE_FILE = "whatsapp.html" + template = templateEnv.get_template(TEMPLATE_FILE) -if len(sys.argv) < 3: - output_folder = "temp" -else: - output_folder = sys.argv[2] + total_row_number = len(data) + print(f"\nCreating HTML...(0/{total_row_number})", end="\r") -if not os.path.isdir(output_folder): - os.mkdir(output_folder) + if not os.path.isdir(output_folder): + os.mkdir(output_folder) -for current, i in enumerate(data): - if len(data[i]["messages"]) == 0: - continue - phone_number = i.split('@')[0] - if "-"in i: - file_name = "" - else: - file_name = phone_number - - if data[i]["name"] is not None: - if file_name != "": - file_name += "-" - file_name += data[i]["name"].replace("/", "-") - name = data[i]["name"] - else: - name = phone_number - safe_file_name = '' - safe_file_name = "".join(x for x in file_name if x.isalnum() or x in "- ") - with open(f"{output_folder}/{safe_file_name}.html", "w", encoding="utf-8") as f: - f.write(template.render(name=name, msgs=data[i]["messages"].values(), my_avatar=None, their_avatar=f"WhatsApp/Avatars/{i}.j")) - if current % 10 == 0: - print(f"Creating HTML...({current}/{total_row_number})", end="\r") - -print(f"Creating HTML...({total_row_number}/{total_row_number})", end="\r") + for current, i in enumerate(data): + if len(data[i]["messages"]) == 0: + continue + phone_number = i.split('@')[0] + if "-"in i: + file_name = "" + else: + file_name = phone_number + + if data[i]["name"] is not None: + if file_name != "": + file_name += "-" + file_name += data[i]["name"].replace("/", "-") + name = data[i]["name"] + else: + name = phone_number + safe_file_name = '' + safe_file_name = "".join(x for x in file_name if x.isalnum() or x in "- ") + with open(f"{output_folder}/{safe_file_name}.html", "w", encoding="utf-8") as f: + f.write(template.render(name=name, msgs=data[i]["messages"].values(), my_avatar=None, their_avatar=f"WhatsApp/Avatars/{i}.j")) + if current % 10 == 0: + print(f"Creating HTML...({current}/{total_row_number})", end="\r") + + print(f"Creating HTML...({total_row_number}/{total_row_number})", end="\r") -if not os.path.isdir(f"{output_folder}/WhatsApp"): - shutil.move("WhatsApp", f"{output_folder}/") -with open("result.json", "w") as f: - data = json.dumps(data) - print(f"\nWriting JSON file...({int(len(data)/1024/1024)}MB)") - f.write(data) +if __name__ == "__main__": + from optparse import OptionParser + parser = OptionParser() + parser.add_option( + "-w", + "--wa", + dest="wa", + default="wa.db", + help="Path to contact database") + parser.add_option( + "-m", + "--media", + dest="media", + default="WhatsApp", + help="Path to WhatsApp media folder" + ) + # parser.add_option( + # "-t", + # "--template", + # dest="html", + # default="wa.db", + # help="Path to HTML template") + (options, args) = parser.parse_args() + msg_db = "msgstore.db" + output_folder = "temp" + contact_db = options.wa + media_folder = options.media -print("Everything is done!") + if len(args) == 1: + msg_db = args[0] + elif len(args) == 2: + msg_db = args[0] + output_folder = args[1] + + data = {} + + if os.path.isfile(contact_db): + with sqlite3.connect(contact_db) as db: + contacts(db, data) + if os.path.isfile(msg_db): + with sqlite3.connect(msg_db) as db: + messages(db, data) + media(db, data, media_folder) + vcard(db, data) + create_html(data, output_folder) + + if not os.path.isdir(f"{output_folder}/WhatsApp"): + shutil.move(media_folder, f"{output_folder}/") + + with open("result.json", "w") as f: + data = json.dumps(data) + print(f"\nWriting JSON file...({int(len(data)/1024/1024)}MB)") + f.write(data) + + print("Everything is done!") diff --git a/extract_iphone.py b/extract_iphone.py index 5476a7d..4db00ec 100644 --- a/extract_iphone.py +++ b/extract_iphone.py @@ -11,6 +11,7 @@ import shutil from datetime import datetime from mimetypes import MimeTypes +APPLE_TIME = datetime.timestamp(datetime(2001,1,1)) def determine_day(last, current): last = datetime.fromtimestamp(last).date() @@ -20,200 +21,240 @@ def determine_day(last, current): else: return current -data = {} +def messages(db, data): + c = db.cursor() + # Get contacts + c.execute("""SELECT count() FROM ZWACHATSESSION""") + total_row_number = c.fetchone()[0] + print(f"Gathering contacts...({total_row_number})") -# Get contacts -msg = sqlite3.connect("7c7fba66680ef796b916b067077cc246adacf01d") -c = msg.cursor() -c.execute("""SELECT count() FROM ZWACHATSESSION""") -total_row_number = c.fetchone()[0] -print(f"Gathering contacts...({total_row_number})") - -c.execute("""SELECT ZCONTACTJID, ZPARTNERNAME FROM ZWACHATSESSION; """) -row = c.fetchone() -while row is not None: - data[row[0]] = {"name": row[1], "messages":{}} + c.execute("""SELECT ZCONTACTJID, ZPARTNERNAME FROM ZWACHATSESSION; """) row = c.fetchone() + while row is not None: + data[row[0]] = {"name": row[1], "messages":{}} + row = c.fetchone() -# Get message history -c.execute("""SELECT count() FROM ZWAMESSAGE""") -total_row_number = c.fetchone()[0] -apple_time = datetime.timestamp(datetime(2001,1,1)) -print(f"Gathering messages...(0/{total_row_number})", end="\r") + # Get message history + c.execute("""SELECT count() FROM ZWAMESSAGE""") + total_row_number = c.fetchone()[0] + print(f"Gathering messages...(0/{total_row_number})", end="\r") -c.execute("""SELECT COALESCE(ZFROMJID, ZTOJID), ZWAMESSAGE.Z_PK, ZISFROMME, ZMESSAGEDATE, ZTEXT, ZMESSAGETYPE, ZWAGROUPMEMBER.ZMEMBERJID FROM main.ZWAMESSAGE LEFT JOIN main.ZWAGROUPMEMBER ON main.ZWAMESSAGE.ZGROUPMEMBER = main.ZWAGROUPMEMBER.Z_PK;""") -i = 0 -content = c.fetchone() -while content is not None: - if content[0] not in data: - data[content[0]] = {"name": None, "messages": {}} - ts = apple_time + content[3] - data[content[0]]["messages"][content[1]] = { - "from_me": bool(content[2]), - "timestamp": ts, - "time": datetime.fromtimestamp(ts).strftime("%H:%M"), - "media": False, - "reply": None, - "caption": None - } - if "-" in content[0] and content[2] == 0: - name = None - if content[6] is not None: - if content[6] in data: - name = data[content[6]]["name"] - if "@" in content[6]: - fallback = content[6].split('@')[0] + c.execute("""SELECT COALESCE(ZFROMJID, ZTOJID), ZWAMESSAGE.Z_PK, ZISFROMME, ZMESSAGEDATE, ZTEXT, ZMESSAGETYPE, ZWAGROUPMEMBER.ZMEMBERJID FROM main.ZWAMESSAGE LEFT JOIN main.ZWAGROUPMEMBER ON main.ZWAMESSAGE.ZGROUPMEMBER = main.ZWAGROUPMEMBER.Z_PK;""") + i = 0 + content = c.fetchone() + while content is not None: + if content[0] not in data: + data[content[0]] = {"name": None, "messages": {}} + ts = APPLE_TIME + content[3] + data[content[0]]["messages"][content[1]] = { + "from_me": bool(content[2]), + "timestamp": ts, + "time": datetime.fromtimestamp(ts).strftime("%H:%M"), + "media": False, + "reply": None, + "caption": None + } + if "-" in content[0] and content[2] == 0: + name = None + if content[6] is not None: + if content[6] in data: + name = data[content[6]]["name"] + if "@" in content[6]: + fallback = content[6].split('@')[0] + else: + fallback = None else: fallback = None + data[content[0]]["messages"][content[1]]["sender"] = name or fallback else: - fallback = None - data[content[0]]["messages"][content[1]]["sender"] = name or fallback - else: - data[content[0]]["messages"][content[1]]["sender"] = None - if content[5] == 6: - # Metadata - if "-" in content[0]: - # Group - if content[4] is not None: - # Chnaged name - try: - int(content[4]) - except: - data[content[0]]["messages"][content[1]]["data"] = "{The group name changed to "f"{content[4]}"" }" + data[content[0]]["messages"][content[1]]["sender"] = None + if content[5] == 6: + # Metadata + if "-" in content[0]: + # Group + if content[4] is not None: + # Chnaged name + try: + int(content[4]) + except: + data[content[0]]["messages"][content[1]]["data"] = "{The group name changed to "f"{content[4]}"" }" + else: + del data[content[0]]["messages"][content[1]] else: - del data[content[0]]["messages"][content[1]] + data[content[0]]["messages"][content[1]]["data"] = None else: data[content[0]]["messages"][content[1]]["data"] = None else: - data[content[0]]["messages"][content[1]]["data"] = None - else: - # real message - if content[2] == 1: - if content[5] == 14: - data[content[0]]["messages"][content[1]]["data"] = "{Message deleted}" + # real message + if content[2] == 1: + if content[5] == 14: + data[content[0]]["messages"][content[1]]["data"] = "{Message deleted}" + else: + data[content[0]]["messages"][content[1]]["data"] = content[4] else: - data[content[0]]["messages"][content[1]]["data"] = content[4] - else: - if content[5] == 14: - data[content[0]]["messages"][content[1]]["data"] = "{Message deleted}" - else: - data[content[0]]["messages"][content[1]]["data"] = content[4] - - i += 1 - if i % 1000 == 0: - print(f"Gathering messages...({i}/{total_row_number})", end="\r") + if content[5] == 14: + data[content[0]]["messages"][content[1]]["data"] = "{Message deleted}" + else: + data[content[0]]["messages"][content[1]]["data"] = content[4] + + i += 1 + if i % 1000 == 0: + print(f"Gathering messages...({i}/{total_row_number})", end="\r") + content = c.fetchone() + print(f"Gathering messages...({total_row_number}/{total_row_number})", end="\r") + +def media(db, data, media_folder): + c = db.cursor() + # Get media + c.execute("""SELECT count() FROM ZWAMEDIAITEM""") + total_row_number = c.fetchone()[0] + print(f"\nGathering media...(0/{total_row_number})", end="\r") + i = 0 + c.execute("""SELECT COALESCE(ZWAMESSAGE.ZFROMJID, ZWAMESSAGE.ZTOJID) as _id, ZMESSAGE, ZMEDIALOCALPATH, ZMEDIAURL, ZVCARDSTRING, ZMEDIAKEY, ZTITLE FROM ZWAMEDIAITEM INNER JOIN ZWAMESSAGE ON ZWAMEDIAITEM.ZMESSAGE = ZWAMESSAGE.Z_PK WHERE ZMEDIALOCALPATH IS NOT NULL ORDER BY _id ASC""") content = c.fetchone() -print(f"Gathering messages...({total_row_number}/{total_row_number})", end="\r") -# Get media - -c.execute("""SELECT count() FROM ZWAMEDIAITEM""") -total_row_number = c.fetchone()[0] -print(f"\nGathering media...(0/{total_row_number})", end="\r") -i = 0 -c.execute("""SELECT COALESCE(ZWAMESSAGE.ZFROMJID, ZWAMESSAGE.ZTOJID) as _id, ZMESSAGE, ZMEDIALOCALPATH, ZMEDIAURL, ZVCARDSTRING, ZMEDIAKEY, ZTITLE FROM ZWAMEDIAITEM INNER JOIN ZWAMESSAGE ON ZWAMEDIAITEM.ZMESSAGE = ZWAMESSAGE.Z_PK WHERE ZMEDIALOCALPATH IS NOT NULL ORDER BY _id ASC""") -content = c.fetchone() -mime = MimeTypes() -while content is not None: - file_path = f"Message/{content[2]}" - data[content[0]]["messages"][content[1]]["media"] = True - - if os.path.isfile(file_path): - data[content[0]]["messages"][content[1]]["data"] = file_path - if content[4] is None: - guess = mime.guess_type(file_path)[0] - if guess is not None: - data[content[0]]["messages"][content[1]]["mime"] = guess + mime = MimeTypes() + while content is not None: + file_path = f"Message/{content[2]}" + data[content[0]]["messages"][content[1]]["media"] = True + + if os.path.isfile(file_path): + data[content[0]]["messages"][content[1]]["data"] = file_path + if content[4] is None: + guess = mime.guess_type(file_path)[0] + if guess is not None: + data[content[0]]["messages"][content[1]]["mime"] = guess + else: + data[content[0]]["messages"][content[1]]["mime"] = "data/data" else: - data[content[0]]["messages"][content[1]]["mime"] = "data/data" + data[content[0]]["messages"][content[1]]["mime"] = content[4] else: - data[content[0]]["messages"][content[1]]["mime"] = content[4] - else: - # if "https://mmg" in content[4]: - # try: - # r = requests.get(content[3]) - # if r.status_code != 200: - # raise RuntimeError() - # except: - # data[content[0]]["messages"][content[1]]["data"] = "{The media is missing}" - # data[content[0]]["messages"][content[1]]["mime"] = "media" - # else: - data[content[0]]["messages"][content[1]]["data"] = "{The media is missing}" - data[content[0]]["messages"][content[1]]["mime"] = "media" - if content[6] is not None: - data[content[0]]["messages"][content[1]]["caption"] = content[6] - i += 1 - if i % 100 == 0: - print(f"Gathering media...({i}/{total_row_number})", end="\r") - content = c.fetchone() -print(f"Gathering media...({total_row_number}/{total_row_number})", end="\r") + # if "https://mmg" in content[4]: + # try: + # r = requests.get(content[3]) + # if r.status_code != 200: + # raise RuntimeError() + # except: + # data[content[0]]["messages"][content[1]]["data"] = "{The media is missing}" + # data[content[0]]["messages"][content[1]]["mime"] = "media" + # else: + data[content[0]]["messages"][content[1]]["data"] = "{The media is missing}" + data[content[0]]["messages"][content[1]]["mime"] = "media" + if content[6] is not None: + data[content[0]]["messages"][content[1]]["caption"] = content[6] + i += 1 + if i % 100 == 0: + print(f"Gathering media...({i}/{total_row_number})", end="\r") + content = c.fetchone() + print(f"Gathering media...({total_row_number}/{total_row_number})", end="\r") -c.execute("""SELECT DISTINCT ZWAVCARDMENTION.ZMEDIAITEM, ZWAMEDIAITEM.ZMESSAGE, COALESCE(ZWAMESSAGE.ZFROMJID, ZWAMESSAGE.ZTOJID) as _id, ZVCARDNAME, ZVCARDSTRING FROM ZWAVCARDMENTION INNER JOIN ZWAMEDIAITEM ON ZWAVCARDMENTION.ZMEDIAITEM = ZWAMEDIAITEM.Z_PK INNER JOIN ZWAMESSAGE ON ZWAMEDIAITEM.ZMESSAGE = ZWAMESSAGE.Z_PK""") -rows = c.fetchall() -total_row_number = len(rows) -print(f"\nGathering vCards...(0/{total_row_number})", end="\r") -base = "Message/vCards" -for index, row in enumerate(rows): - if not os.path.isdir(base): - os.mkdir(base) - file_name = "".join(x for x in row[3] if x.isalnum()) - file_path = f"{base}/{file_name[:200]}.vcf" - if not os.path.isfile(file_path): - with open(file_path, "w", encoding="utf-8") as f: - f.write(row[4]) - data[row[2]]["messages"][row[1]]["data"] = row[3] + "{ The vCard file cannot be displayed here, however it should be located at " + file_path + "}" - data[row[2]]["messages"][row[1]]["mime"] = "x-vcard" - data[row[2]]["messages"][row[1]]["media"] = True - print(f"Gathering vCards...({index + 1}/{total_row_number})", end="\r") +def vcard(db, data): + c = db.cursor() + c.execute("""SELECT DISTINCT ZWAVCARDMENTION.ZMEDIAITEM, ZWAMEDIAITEM.ZMESSAGE, COALESCE(ZWAMESSAGE.ZFROMJID, ZWAMESSAGE.ZTOJID) as _id, ZVCARDNAME, ZVCARDSTRING FROM ZWAVCARDMENTION INNER JOIN ZWAMEDIAITEM ON ZWAVCARDMENTION.ZMEDIAITEM = ZWAMEDIAITEM.Z_PK INNER JOIN ZWAMESSAGE ON ZWAMEDIAITEM.ZMESSAGE = ZWAMESSAGE.Z_PK""") + rows = c.fetchall() + total_row_number = len(rows) + print(f"\nGathering vCards...(0/{total_row_number})", end="\r") + base = "Message/vCards" + for index, row in enumerate(rows): + if not os.path.isdir(base): + os.mkdir(base) + file_name = "".join(x for x in row[3] if x.isalnum()) + file_path = f"{base}/{file_name[:200]}.vcf" + if not os.path.isfile(file_path): + with open(file_path, "w", encoding="utf-8") as f: + f.write(row[4]) + data[row[2]]["messages"][row[1]]["data"] = row[3] + "{ The vCard file cannot be displayed here, however it should be located at " + file_path + "}" + data[row[2]]["messages"][row[1]]["mime"] = "x-vcard" + data[row[2]]["messages"][row[1]]["media"] = True + print(f"Gathering vCards...({index + 1}/{total_row_number})", end="\r") -templateLoader = jinja2.FileSystemLoader(searchpath="./") -templateEnv = jinja2.Environment(loader=templateLoader) -templateEnv.globals.update(determine_day=determine_day) -TEMPLATE_FILE = "whatsapp.html" -template = templateEnv.get_template(TEMPLATE_FILE) +def create_html(data, output_folder): + templateLoader = jinja2.FileSystemLoader(searchpath="./") + templateEnv = jinja2.Environment(loader=templateLoader) + templateEnv.globals.update(determine_day=determine_day) + TEMPLATE_FILE = "whatsapp.html" + template = templateEnv.get_template(TEMPLATE_FILE) -total_row_number = len(data) -print(f"\nCreating HTML...(0/{total_row_number})", end="\r") + total_row_number = len(data) + print(f"\nCreating HTML...(0/{total_row_number})", end="\r") -if len(sys.argv) < 3: - output_folder = "temp" -else: - output_folder = sys.argv[2] + if not os.path.isdir(output_folder): + os.mkdir(output_folder) -if not os.path.isdir(output_folder): - os.mkdir(output_folder) + for current, i in enumerate(data): + if len(data[i]["messages"]) == 0: + continue + phone_number = i.split('@')[0] + if "-"in i: + file_name = "" + else: + file_name = phone_number + + if data[i]["name"] is not None: + if file_name != "": + file_name += "-" + file_name += data[i]["name"].replace("/", "-") + name = data[i]["name"] + else: + name = phone_number + + safe_file_name = '' + safe_file_name = "".join(x for x in file_name if x.isalnum() or x in "- ") + with open(f"{output_folder}/{safe_file_name}.html", "w", encoding="utf-8") as f: + f.write(template.render(name=name, msgs=data[i]["messages"].values(), my_avatar=None, their_avatar=f"WhatsApp/Avatars/{i}.j")) + if current % 10 == 0: + print(f"Creating HTML...({current}/{total_row_number})", end="\r") + + print(f"Creating HTML...({total_row_number}/{total_row_number})", end="\r") -for current, i in enumerate(data): - if len(data[i]["messages"]) == 0: - continue - phone_number = i.split('@')[0] - if "-"in i: - file_name = "" - else: - file_name = phone_number - - if data[i]["name"] is not None: - if file_name != "": - file_name += "-" - file_name += data[i]["name"].replace("/", "-") - name = data[i]["name"] - else: - name = phone_number - - safe_file_name = '' - safe_file_name = "".join(x for x in file_name if x.isalnum() or x in "- ") - with open(f"{output_folder}/{safe_file_name}.html", "w", encoding="utf-8") as f: - f.write(template.render(name=name, msgs=data[i]["messages"].values(), my_avatar=None, their_avatar=f"WhatsApp/Avatars/{i}.j")) - if current % 10 == 0: - print(f"Creating HTML...({current}/{total_row_number})", end="\r") - -print(f"Creating HTML...({total_row_number}/{total_row_number})", end="\r") +if __name__ == "__main__": + from optparse import OptionParser + parser = OptionParser() + parser.add_option( + "-w", + "--wa", + dest="wa", + default="wa.db", + help="Path to contact database") + parser.add_option( + "-m", + "--media", + dest="media", + default="Message", + help="Path to WhatsApp media folder" + ) + # parser.add_option( + # "-t", + # "--template", + # dest="html", + # default="wa.db", + # help="Path to HTML template") + (options, args) = parser.parse_args() + msg_db = "7c7fba66680ef796b916b067077cc246adacf01d" + output_folder = "temp" + contact_db = options.wa + media_folder = options.media -if not os.path.isdir(f"{output_folder}/Message"): - shutil.move("Message", f"{output_folder}/") + if len(args) == 1: + msg_db = args[0] + elif len(args) == 2: + msg_db = args[0] + output_folder = args[1] + + data = {} -with open("result.json", "w") as f: - data = json.dumps(data) - print(f"\nWriting JSON file...({int(len(data)/1024/1024)}MB)") - f.write(data) + if os.path.isfile(msg_db): + with sqlite3.connect(msg_db) as db: + messages(db, data) + media(db, data, media_folder) + vcard(db, data) + create_html(data, output_folder) -print("Everything is done!") + if not os.path.isdir(f"{output_folder}/WhatsApp"): + shutil.move(media_folder, f"{output_folder}/") + + with open("result.json", "w") as f: + data = json.dumps(data) + print(f"\nWriting JSON file...({int(len(data)/1024/1024)}MB)") + f.write(data) + + print("Everything is done!")