mirror of
https://github.com/KnugiHK/WhatsApp-Chat-Exporter.git
synced 2026-01-28 21:30:43 +00:00
Lower the demand of RAM
This commit is contained in:
123
extract.py
123
extract.py
@@ -59,51 +59,6 @@ while content is not None:
|
||||
print(f"Gathering messages...({i}/{total_row_number})", end="\r")
|
||||
content = c.fetchone()
|
||||
print(f"Gathering messages...({total_row_number}/{total_row_number})", end="\r")
|
||||
# Get media
|
||||
|
||||
c.execute("""SELECT count() FROM message_media""")
|
||||
total_row_number = c.fetchone()[0]
|
||||
print(f"\nGathering media...(0/{total_row_number})", end="\r")
|
||||
i = 0
|
||||
c.execute("""SELECT messages.key_remote_jid, message_row_id, file_path, message_url, mime_type, media_key FROM message_media INNER JOIN messages ON message_media.message_row_id = messages._id ORDER BY messages.key_remote_jid ASC""")
|
||||
content = c.fetchone()
|
||||
mime = MimeTypes()
|
||||
while content is not None:
|
||||
file_path = f"WhatsApp/{content[2]}"
|
||||
if os.path.isfile(file_path):
|
||||
with open(file_path, "rb") as f:
|
||||
data[content[0]]["messages"][content[1]]["data"] = base64.b64encode(f.read()).decode("utf-8")
|
||||
data[content[0]]["messages"][content[1]]["media"] = True
|
||||
if content[4] is None:
|
||||
guess = mime.guess_type(file_path)[0]
|
||||
if guess is not None:
|
||||
data[content[0]]["messages"][content[1]]["mime"] = guess
|
||||
else:
|
||||
data[content[0]]["messages"][content[1]]["mime"] = "image/jpeg"
|
||||
else:
|
||||
data[content[0]]["messages"][content[1]]["mime"] = content[4]
|
||||
else:
|
||||
if "https://mmg" in content[4]:
|
||||
try:
|
||||
r = requests.get(content[3])
|
||||
if r.status_code != 200:
|
||||
raise RuntimeError()
|
||||
except:
|
||||
data[content[0]]["messages"][content[1]]["data"] = "{The media is missing}"
|
||||
data[content[0]]["messages"][content[1]]["media"] = True
|
||||
data[content[0]]["messages"][content[1]]["mime"] = "media"
|
||||
else:
|
||||
open('temp.file', 'wb').write(r.content)
|
||||
open('temp.asdasda', "a").write(content[3])
|
||||
else:
|
||||
data[content[0]]["messages"][content[1]]["data"] = "{The media is missing}"
|
||||
data[content[0]]["messages"][content[1]]["media"] = True
|
||||
data[content[0]]["messages"][content[1]]["mime"] = "media"
|
||||
i += 1
|
||||
if i % 1000 == 0:
|
||||
print(f"Gathering media...({i}/{total_row_number})", end="\r")
|
||||
content = c.fetchone()
|
||||
print(f"Gathering media...({total_row_number}/{total_row_number})", end="\r")
|
||||
|
||||
templateLoader = jinja2.FileSystemLoader(searchpath="./")
|
||||
templateEnv = jinja2.Environment(loader=templateLoader)
|
||||
@@ -111,7 +66,7 @@ templateEnv.globals.update(determine_day=determine_day)
|
||||
TEMPLATE_FILE = "whatsapp.html"
|
||||
template = templateEnv.get_template(TEMPLATE_FILE)
|
||||
|
||||
total_row_number = len(data)
|
||||
total_row_number_html = len(data)
|
||||
print(f"\nCreating HTML...(0/{total_row_number})", end="\r")
|
||||
|
||||
if len(sys.argv) < 3:
|
||||
@@ -122,30 +77,74 @@ else:
|
||||
if not os.path.isdir(output_folder):
|
||||
os.mkdir(output_folder)
|
||||
|
||||
for current, i in enumerate(data):
|
||||
if len(data[i]["messages"]) == 0:
|
||||
list_of_contact = tuple(data.keys())
|
||||
|
||||
for current, contact in enumerate(list_of_contact):
|
||||
if len(data[contact]["messages"]) == 0:
|
||||
continue
|
||||
phone_number = i.split('@')[0]
|
||||
if "-"in i:
|
||||
# Get media
|
||||
c.execute("""SELECT count() FROM message_media INNER JOIN messages ON message_media.message_row_id = messages._id WHERE messages.key_remote_jid=?""", [contact])
|
||||
total_row_number = c.fetchone()[0]
|
||||
phone_number = contact.split('@')[0]
|
||||
print(f"Creating HTML...({current}/{total_row_number_html})|Gathering media with {phone_number}...(0/{total_row_number})", end="\r")
|
||||
j = 0
|
||||
c.execute("""SELECT messages.key_remote_jid, message_row_id, file_path, message_url, mime_type, media_key, file_hash FROM message_media INNER JOIN messages ON message_media.message_row_id = messages._id WHERE messages.key_remote_jid=? ORDER BY messages.key_remote_jid ASC""", [contact])
|
||||
contents = c.fetchall()
|
||||
mime = MimeTypes()
|
||||
for content in contents:
|
||||
file_path = f"WhatsApp/{content[2]}"
|
||||
data[content[0]]["messages"][content[1]]["media"] = True
|
||||
if os.path.isfile(file_path):
|
||||
with open(file_path, "rb") as f:
|
||||
data[content[0]]["messages"][content[1]]["data"] = base64.b64encode(f.read()).decode("utf-8")
|
||||
if content[4] is None:
|
||||
guess = mime.guess_type(file_path)[0]
|
||||
if guess is not None:
|
||||
data[content[0]]["messages"][content[1]]["mime"] = guess
|
||||
else:
|
||||
data[content[0]]["messages"][content[1]]["mime"] = "image/jpeg"
|
||||
else:
|
||||
data[content[0]]["messages"][content[1]]["mime"] = content[4]
|
||||
else:
|
||||
# if "https://mmg" in content[2]:
|
||||
# try:
|
||||
# r = requests.get(content[2])
|
||||
# if r.status_code != 200:
|
||||
# raise RuntimeError()
|
||||
# except:
|
||||
# data[contact]["messages"][content[0]]["data"] = "{The media is missing}"
|
||||
# data[contact]["messages"][content[0]]["media"] = True
|
||||
# data[contact]["messages"][content[0]]["mime"] = "media"
|
||||
# else:
|
||||
# open('temp.file', 'wb').write(r.content)
|
||||
# open('temp.asdasda', "a").write(content[3])
|
||||
# else:
|
||||
data[content[0]]["messages"][content[1]]["data"] = "{The media is missing}"
|
||||
data[content[0]]["messages"][content[1]]["mime"] = "media"
|
||||
j += 1
|
||||
if j % 100 == 0:
|
||||
print(f"Creating HTML...({current}/{total_row_number_html})|Gathering media with {phone_number}...({j}/{total_row_number}) ", end="\r")
|
||||
print(f"Creating HTML...({current}/{total_row_number_html})|Gathering media with {phone_number}...({total_row_number}/{total_row_number}) ", end="\r")
|
||||
if "-" in contact:
|
||||
file_name = ""
|
||||
else:
|
||||
file_name = phone_number
|
||||
|
||||
if data[i]["name"] is not None:
|
||||
if data[contact]["name"] is not None:
|
||||
if file_name != "":
|
||||
file_name += "-"
|
||||
file_name += data[i]["name"].replace("/", "-")
|
||||
file_name += data[contact]["name"].replace("/", "-")
|
||||
if data[contact]["name"]:
|
||||
name = data[contact]["name"]
|
||||
else:
|
||||
name = phone_number
|
||||
|
||||
with open(f"{output_folder}/{file_name}.html", "w", encoding="utf-8") as f:
|
||||
f.write(template.render(name=data[i]["name"] if data[i]["name"] is not None else phone_number, msgs=data[i]["messages"].values()))
|
||||
if current % 10 == 0:
|
||||
print(f"Creating HTML...({current}/{total_row_number})", end="\r")
|
||||
f.write(template.render(name=name, msgs=data[contact]["messages"].values()))
|
||||
|
||||
print(f"Creating HTML...({total_row_number}/{total_row_number})", end="\r")
|
||||
del data[contact]
|
||||
#if current % 10 == 0:
|
||||
#print(f"Creating HTML...({current}/{total_row_number})", end="\r")
|
||||
|
||||
with open("result.json", "w") as f:
|
||||
data = json.dumps(data)
|
||||
print(f"\nWriting JSON file...({int(len(data)/1024/1024)}MB)")
|
||||
f.write(data)
|
||||
|
||||
print("Everything is done!")
|
||||
print(f"Creating HTML...({total_row_number_html}/{total_row_number_html}) ")
|
||||
print("Everything is done! ")
|
||||
|
||||
Reference in New Issue
Block a user