Refine code to use the data model

This commit is contained in:
KnugiHK
2023-06-08 17:51:57 +08:00
parent 0e802f4554
commit dbdfdaedcf

View File

@@ -9,6 +9,7 @@ import shutil
from pathlib import Path from pathlib import Path
from datetime import datetime from datetime import datetime
from mimetypes import MimeTypes from mimetypes import MimeTypes
from Whatsapp_Chat_Exporter.data_model import ChatStore, Message
from Whatsapp_Chat_Exporter.utility import sanitize_except, determine_day, APPLE_TIME from Whatsapp_Chat_Exporter.utility import sanitize_except, determine_day, APPLE_TIME
@@ -20,17 +21,17 @@ def messages(db, data):
print(f"Gathering contacts...({total_row_number})") print(f"Gathering contacts...({total_row_number})")
c.execute("""SELECT ZCONTACTJID, ZPARTNERNAME FROM ZWACHATSESSION; """) c.execute("""SELECT ZCONTACTJID, ZPARTNERNAME FROM ZWACHATSESSION; """)
row = c.fetchone() content = c.fetchone()
while row is not None: while content is not None:
data[row[0]] = {"name": row[1], "messages": {}} data[content["ZCONTACTJID"]] = ChatStore(content["ZPARTNERNAME"])
row = c.fetchone() content = c.fetchone()
# Get message history # Get message history
c.execute("""SELECT count() FROM ZWAMESSAGE""") c.execute("""SELECT count() FROM ZWAMESSAGE""")
total_row_number = c.fetchone()[0] total_row_number = c.fetchone()[0]
print(f"Gathering messages...(0/{total_row_number})", end="\r") print(f"Gathering messages...(0/{total_row_number})", end="\r")
c.execute("""SELECT COALESCE(ZFROMJID, ZTOJID), c.execute("""SELECT COALESCE(ZFROMJID, ZTOJID) as _id,
ZWAMESSAGE.Z_PK, ZWAMESSAGE.Z_PK,
ZISFROMME, ZISFROMME,
ZMESSAGEDATE, ZMESSAGEDATE,
@@ -47,82 +48,79 @@ def messages(db, data):
i = 0 i = 0
content = c.fetchone() content = c.fetchone()
while content is not None: while content is not None:
if content[0] not in data: _id = content["_id"]
data[content[0]] = {"name": None, "messages": {}} Z_PK = content["Z_PK"]
ts = APPLE_TIME + content[3] if _id not in data:
data[content[0]]["messages"][content[1]] = { data[_id] = ChatStore()
"from_me": bool(content[2]), ts = APPLE_TIME + content["ZMESSAGEDATE"]
"timestamp": ts, data[_id].add_message(Z_PK, Message(
"time": datetime.fromtimestamp(ts).strftime("%H:%M"), from_me=content["ZISFROMME"],
"media": False, timestamp=ts,
"reply": None, time=ts, # Could be bug
"caption": None, key_id=content["ZSTANZAID"][:17],
"meta": False, ))
"data": None, if "-" in _id and content["ZISFROMME"] == 0:
"key_id": content["ZSTANZAID"][:17]
}
if "-" in content[0] and content[2] == 0:
name = None name = None
if content[6] is not None: if content["ZMEMBERJID"] is not None:
if content[6] in data: if content["ZMEMBERJID"] in data:
name = data[content[6]]["name"] name = data[content["ZMEMBERJID"]].name
if "@" in content[6]: if "@" in content["ZMEMBERJID"]:
fallback = content[6].split('@')[0] fallback = content["ZMEMBERJID"].split('@')[0]
else: else:
fallback = None fallback = None
else: else:
fallback = None fallback = None
data[content[0]]["messages"][content[1]]["sender"] = name or fallback data[_id].messages[Z_PK].sender = name or fallback
else: else:
data[content[0]]["messages"][content[1]]["sender"] = None data[_id].messages[Z_PK].sender = None
if content[5] == 6: if content["ZMESSAGETYPE"] == 6:
# Metadata # Metadata
if "-" in content[0]: if "-" in _id:
# Group # Group
if content[4] is not None: if content["ZTEXT"] is not None:
# Chnaged name # Chnaged name
try: try:
int(content[4]) int(content["ZTEXT"])
except ValueError: except ValueError:
msg = f"The group name changed to {content[4]}" msg = f"The group name changed to {content['ZTEXT']}"
data[content[0]]["messages"][content[1]]["data"] = msg data[_id].messages[Z_PK].data = msg
data[content[0]]["messages"][content[1]]["meta"] = True data[_id].messages[Z_PK].meta = True
else: else:
del data[content[0]]["messages"][content[1]] del data[_id].messages[Z_PK]
else: else:
data[content[0]]["messages"][content[1]]["data"] = None data[_id].messages[Z_PK].data = None
else: else:
data[content[0]]["messages"][content[1]]["data"] = None data[_id].messages[Z_PK].data = None
else: else:
# real message # real message
if content["ZMETADATA"] is not None and content["ZMETADATA"].startswith(b"\x2a\x14"): if content["ZMETADATA"] is not None and content["ZMETADATA"].startswith(b"\x2a\x14"):
quoted = content["ZMETADATA"][2:19] quoted = content["ZMETADATA"][2:19]
data[content[0]]["messages"][content[1]]["reply"] = quoted.decode() data[_id].messages[Z_PK].reply = quoted.decode()
data[content[0]]["messages"][content[1]]["quoted_data"] = None # TODO data[_id].messages[Z_PK].quoted_data = None # TODO
if content[2] == 1: if content["ZISFROMME"] == 1:
if content[5] == 14: if content["ZMESSAGETYPE"] == 14:
msg = "Message deleted" msg = "Message deleted"
data[content[0]]["messages"][content[1]]["meta"] = True data[_id].messages[Z_PK].meta = True
else: else:
msg = content[4] msg = content["ZTEXT"]
if msg is not None: if msg is not None:
if "\r\n" in msg: if "\r\n" in msg:
msg = msg.replace("\r\n", "<br>") msg = msg.replace("\r\n", "<br>")
if "\n" in msg: if "\n" in msg:
msg = msg.replace("\n", "<br>") msg = msg.replace("\n", "<br>")
else: else:
if content[5] == 14: if content["ZMESSAGETYPE"] == 14:
msg = "Message deleted" msg = "Message deleted"
data[content[0]]["messages"][content[1]]["meta"] = True data[_id].messages[Z_PK].meta = True
else: else:
msg = content[4] msg = content["ZTEXT"]
if msg is not None: if msg is not None:
if "\r\n" in msg: if "\r\n" in msg:
msg = msg.replace("\r\n", "<br>") msg = msg.replace("\r\n", "<br>")
if "\n" in msg: if "\n" in msg:
msg = msg.replace("\n", "<br>") msg = msg.replace("\n", "<br>")
data[content[0]]["messages"][content[1]]["data"] = msg data[_id].messages[Z_PK].data = msg
i += 1 i += 1
if i % 1000 == 0: if i % 1000 == 0:
print(f"Gathering messages...({i}/{total_row_number})", end="\r") print(f"Gathering messages...({i}/{total_row_number})", end="\r")
@@ -153,34 +151,36 @@ def media(db, data, media_folder):
content = c.fetchone() content = c.fetchone()
mime = MimeTypes() mime = MimeTypes()
while content is not None: while content is not None:
file_path = f"{media_folder}/{content[2]}" file_path = f"{media_folder}/{content['ZMEDIALOCALPATH']}"
data[content[0]]["messages"][content[1]]["media"] = True _id = content["_id"]
ZMESSAGE = content["ZMESSAGE"]
data[_id].messages[ZMESSAGE].media = True
if os.path.isfile(file_path): if os.path.isfile(file_path):
data[content[0]]["messages"][content[1]]["data"] = file_path data[_id].messages[ZMESSAGE].data = file_path
if content[4] is None: if content["ZVCARDSTRING"] is None:
guess = mime.guess_type(file_path)[0] guess = mime.guess_type(file_path)[0]
if guess is not None: if guess is not None:
data[content[0]]["messages"][content[1]]["mime"] = guess data[_id].messages[ZMESSAGE].mime = guess
else: else:
data[content[0]]["messages"][content[1]]["mime"] = "data/data" data[_id].messages[ZMESSAGE].mime = "data/data"
else: else:
data[content[0]]["messages"][content[1]]["mime"] = content[4] data[_id].messages[ZMESSAGE].mime = content["ZVCARDSTRING"]
else: else:
# if "https://mmg" in content[4]: # if "https://mmg" in content["ZVCARDSTRING"]:
# try: # try:
# r = requests.get(content[3]) # r = requests.get(content["ZMEDIAURL"])
# if r.status_code != 200: # if r.status_code != 200:
# raise RuntimeError() # raise RuntimeError()
# except: # except:
# data[content[0]]["messages"][content[1]]["data"] = "{The media is missing}" # data[_id].messages[ZMESSAGE].data"] = "{The media is missing}"
# data[content[0]]["messages"][content[1]]["mime"] = "media" # data[_id].messages[ZMESSAGE].mime"] = "media"
# else: # else:
data[content[0]]["messages"][content[1]]["data"] = "The media is missing" data[_id].messages[ZMESSAGE].data = "The media is missing"
data[content[0]]["messages"][content[1]]["mime"] = "media" data[_id].messages[ZMESSAGE].mime = "media"
data[content[0]]["messages"][content[1]]["meta"] = True data[_id].messages[ZMESSAGE].meta = True
if content[6] is not None: if content["ZTITLE"] is not None:
data[content[0]]["messages"][content[1]]["caption"] = content[6] data[_id].messages[ZMESSAGE].caption = content["ZTITLE"]
i += 1 i += 1
if i % 100 == 0: if i % 100 == 0:
print(f"Gathering media...({i}/{total_row_number})", end="\r") print(f"Gathering media...({i}/{total_row_number})", end="\r")
@@ -202,25 +202,27 @@ def vcard(db, data):
ON ZWAVCARDMENTION.ZMEDIAITEM = ZWAMEDIAITEM.Z_PK ON ZWAVCARDMENTION.ZMEDIAITEM = ZWAMEDIAITEM.Z_PK
INNER JOIN ZWAMESSAGE INNER JOIN ZWAMESSAGE
ON ZWAMEDIAITEM.ZMESSAGE = ZWAMESSAGE.Z_PK""") ON ZWAMEDIAITEM.ZMESSAGE = ZWAMESSAGE.Z_PK""")
rows = c.fetchall() contents = c.fetchall()
total_row_number = len(rows) total_row_number = len(contents)
print(f"\nGathering vCards...(0/{total_row_number})", end="\r") print(f"\nGathering vCards...(0/{total_row_number})", end="\r")
base = "Message/vCards" base = "Message/vCards"
if not os.path.isdir(base): if not os.path.isdir(base):
Path(base).mkdir(parents=True, exist_ok=True) Path(base).mkdir(parents=True, exist_ok=True)
for index, row in enumerate(rows): for index, content in enumerate(contents):
file_name = "".join(x for x in row[3] if x.isalnum()) file_name = "".join(x for x in content["ZVCARDNAME"] if x.isalnum())
file_name = file_name.encode('utf-8')[:251].decode('utf-8', 'ignore') file_name = file_name.encode('utf-8')[:251].decode('utf-8', 'ignore')
file_path = os.path.join(base, f"{file_name}.vcf") file_path = os.path.join(base, f"{file_name}.vcf")
if not os.path.isfile(file_path): if not os.path.isfile(file_path):
with open(file_path, "w", encoding="utf-8") as f: with open(file_path, "w", encoding="utf-8") as f:
f.write(row[4]) f.write(content["ZVCARDSTRING"])
data[row[2]]["messages"][row[1]]["data"] = row[3] + \ _id = content["_id"]
ZMESSAGE = content["ZMESSAGE"]
data[_id].messages[ZMESSAGE].data = content["ZVCARDNAME"] + \
"The vCard file cannot be displayed here, " \ "The vCard file cannot be displayed here, " \
f"however it should be located at {file_path}" f"however it should be located at {file_path}"
data[row[2]]["messages"][row[1]]["mime"] = "text/x-vcard" data[_id].messages[ZMESSAGE].mime = "text/x-vcard"
data[row[2]]["messages"][row[1]]["media"] = True data[_id].messages[ZMESSAGE].media = True
data[row[2]]["messages"][row[1]]["meta"] = True data[_id].messages[ZMESSAGE].meta = True
print(f"Gathering vCards...({index + 1}/{total_row_number})", end="\r") print(f"Gathering vCards...({index + 1}/{total_row_number})", end="\r")
@@ -256,7 +258,7 @@ def create_html(data, output_folder, template=None, embedded=False, offline_stat
w3css = os.path.join(offline_static, "w3.css") w3css = os.path.join(offline_static, "w3.css")
for current, contact in enumerate(data): for current, contact in enumerate(data):
if len(data[contact]["messages"]) == 0: if len(data[contact].messages) == 0:
continue continue
phone_number = contact.split('@')[0] phone_number = contact.split('@')[0]
if "-" in contact: if "-" in contact:
@@ -264,11 +266,11 @@ def create_html(data, output_folder, template=None, embedded=False, offline_stat
else: else:
file_name = phone_number file_name = phone_number
if data[contact]["name"] is not None: if data[contact].name is not None:
if file_name != "": if file_name != "":
file_name += "-" file_name += "-"
file_name += data[contact]["name"].replace("/", "-") file_name += data[contact].name.replace("/", "-")
name = data[contact]["name"] name = data[contact].name
else: else:
name = phone_number name = phone_number
@@ -278,7 +280,7 @@ def create_html(data, output_folder, template=None, embedded=False, offline_stat
f.write( f.write(
template.render( template.render(
name=name, name=name,
msgs=data[contact]["messages"].values(), msgs=data[contact].messages.values(),
my_avatar=None, my_avatar=None,
their_avatar=f"WhatsApp/Avatars/{contact}.j", their_avatar=f"WhatsApp/Avatars/{contact}.j",
w3css=w3css w3css=w3css