Implement splitted outputs #23

This commit is contained in:
KnugiHK
2023-06-08 18:16:47 +08:00
parent dbdfdaedcf
commit f63b180500
6 changed files with 184 additions and 68 deletions

View File

@@ -125,9 +125,13 @@ def main():
parser.add_argument( parser.add_argument(
"--size", "--size",
"--output-size", "--output-size",
"--split",
dest="size", dest="size",
nargs='?',
type=int,
const=0,
default=None, default=None,
help="Maximum size of a single output file in bytes, 0 for auto (not yet implemented)" help="Maximum (Rough) size of a single output file in bytes, 0 for auto"
) )
parser.add_argument( parser.add_argument(
"--no-html", "--no-html",
@@ -216,7 +220,10 @@ def main():
elif args.iphone: elif args.iphone:
import sys import sys
if "--iphone" in sys.argv: if "--iphone" in sys.argv:
print("WARNING: The --iphone flag is deprecated and will be removed in the future. Use --ios instead.") print(
"WARNING: The --iphone flag is deprecated and will"
"be removed in the future. Use --ios instead."
)
messages = extract_iphone.messages messages = extract_iphone.messages
media = extract_iphone.media media = extract_iphone.media
vcard = extract_iphone.vcard vcard = extract_iphone.vcard

View File

@@ -19,9 +19,16 @@ class ChatStore():
del self.messages[id] del self.messages[id]
def to_json(self): def to_json(self):
serialized_msgs = {id : msg.to_json() for id,msg in self.messages.items()} serialized_msgs = {id: msg.to_json() for id, msg in self.messages.items()}
return {'name' : self.name, 'messages' : serialized_msgs} return {'name' : self.name, 'messages' : serialized_msgs}
def get_last_message(self):
return tuple(self.messages.values())[-1]
def get_messages(self):
return self.messages.values()
class Message(): class Message():
def __init__(self, from_me: Union[bool,int], timestamp: int, time: str, key_id: int): def __init__(self, from_me: Union[bool,int], timestamp: int, time: str, key_id: int):
self.from_me = bool(from_me) self.from_me = bool(from_me)

View File

@@ -12,7 +12,7 @@ from pathlib import Path
from mimetypes import MimeTypes from mimetypes import MimeTypes
from hashlib import sha256 from hashlib import sha256
from Whatsapp_Chat_Exporter.data_model import ChatStore, Message from Whatsapp_Chat_Exporter.data_model import ChatStore, Message
from Whatsapp_Chat_Exporter.utility import sanitize_except, determine_day, Crypt from Whatsapp_Chat_Exporter.utility import MAX_SIZE, ROW_SIZE, rendering, sanitize_except, determine_day, Crypt
from Whatsapp_Chat_Exporter.utility import brute_force_offset, CRYPT14_OFFSETS from Whatsapp_Chat_Exporter.utility import brute_force_offset, CRYPT14_OFFSETS
try: try:
@@ -558,7 +558,8 @@ def create_html(
w3css = os.path.join(offline_static, "w3.css") w3css = os.path.join(offline_static, "w3.css")
for current, contact in enumerate(data): for current, contact in enumerate(data):
if len(data[contact].messages) == 0: chat = data[contact]
if len(chat.messages) == 0:
continue continue
phone_number = contact.split('@')[0] phone_number = contact.split('@')[0]
if "-" in contact: if "-" in contact:
@@ -566,25 +567,62 @@ def create_html(
else: else:
file_name = phone_number file_name = phone_number
if data[contact].name is not None: if chat.name is not None:
if file_name != "": if file_name != "":
file_name += "-" file_name += "-"
file_name += data[contact].name.replace("/", "-") file_name += chat.name.replace("/", "-")
name = data[contact].name name = chat.name
else: else:
name = phone_number name = phone_number
safe_file_name = ''
safe_file_name = "".join(x for x in file_name if x.isalnum() or x in "- ") safe_file_name = "".join(x for x in file_name if x.isalnum() or x in "- ")
with open(f"{output_folder}/{safe_file_name}.html", "w", encoding="utf-8") as f:
f.write( if maximum_size is not None:
template.render( current_size = 0
name=name, current_page = 1
msgs=data[contact].messages.values(), render_box = []
my_avatar=None, if maximum_size == 0:
their_avatar=f"WhatsApp/Avatars/{contact}.j", maximum_size = MAX_SIZE
w3css=w3css last_msg = chat.get_last_message().key_id
for message in chat.get_messages():
if message.data is not None and not message.meta and not message.media:
current_size += len(message.data) + ROW_SIZE
else:
current_size += ROW_SIZE + 100 # Assume media and meta HTML are 100 bytes
if current_size > maximum_size:
output_file_name = f"{output_folder}/{safe_file_name}-{current_page}.html"
rendering(
output_file_name,
template,
name,
render_box,
contact,
w3css,
f"{safe_file_name}-{current_page + 1}.html"
) )
render_box = [message]
current_size = 0
current_page += 1
else:
if message.key_id == last_msg:
if current_page == 1:
output_file_name = f"{output_folder}/{safe_file_name}.html"
else:
output_file_name = f"{output_folder}/{safe_file_name}-{current_page}.html"
rendering(
output_file_name,
template,
name,
render_box,
contact,
w3css,
False
) )
else:
render_box.append(message)
else:
output_file_name = f"{output_folder}/{safe_file_name}.html"
rendering(output_file_name, template, name, chat.get_messages(), contact, w3css, False)
if current % 10 == 0: if current % 10 == 0:
print(f"Creating HTML...({current}/{total_row_number})", end="\r") print(f"Creating HTML...({current}/{total_row_number})", end="\r")

View File

@@ -2,15 +2,13 @@
import sqlite3 import sqlite3
import json import json
import string
import jinja2 import jinja2
import os import os
import shutil import shutil
from pathlib import Path from pathlib import Path
from datetime import datetime
from mimetypes import MimeTypes from mimetypes import MimeTypes
from Whatsapp_Chat_Exporter.data_model import ChatStore, Message from Whatsapp_Chat_Exporter.data_model import ChatStore, Message
from Whatsapp_Chat_Exporter.utility import sanitize_except, determine_day, APPLE_TIME from Whatsapp_Chat_Exporter.utility import MAX_SIZE, ROW_SIZE, rendering, sanitize_except, determine_day, APPLE_TIME
def messages(db, data): def messages(db, data):
@@ -56,7 +54,7 @@ def messages(db, data):
data[_id].add_message(Z_PK, Message( data[_id].add_message(Z_PK, Message(
from_me=content["ZISFROMME"], from_me=content["ZISFROMME"],
timestamp=ts, timestamp=ts,
time=ts, # Could be bug time=ts, # TODO: Could be bug
key_id=content["ZSTANZAID"][:17], key_id=content["ZSTANZAID"][:17],
)) ))
if "-" in _id and content["ZISFROMME"] == 0: if "-" in _id and content["ZISFROMME"] == 0:
@@ -226,7 +224,14 @@ def vcard(db, data):
print(f"Gathering vCards...({index + 1}/{total_row_number})", end="\r") print(f"Gathering vCards...({index + 1}/{total_row_number})", end="\r")
def create_html(data, output_folder, template=None, embedded=False, offline_static=False, maximum_size=None): def create_html(
data,
output_folder,
template=None,
embedded=False,
offline_static=False,
maximum_size=None
):
if template is None: if template is None:
template_dir = os.path.dirname(__file__) template_dir = os.path.dirname(__file__)
template_file = "whatsapp.html" template_file = "whatsapp.html"
@@ -258,7 +263,8 @@ def create_html(data, output_folder, template=None, embedded=False, offline_stat
w3css = os.path.join(offline_static, "w3.css") w3css = os.path.join(offline_static, "w3.css")
for current, contact in enumerate(data): for current, contact in enumerate(data):
if len(data[contact].messages) == 0: chat = data[contact]
if len(chat.messages) == 0:
continue continue
phone_number = contact.split('@')[0] phone_number = contact.split('@')[0]
if "-" in contact: if "-" in contact:
@@ -266,26 +272,62 @@ def create_html(data, output_folder, template=None, embedded=False, offline_stat
else: else:
file_name = phone_number file_name = phone_number
if data[contact].name is not None: if chat.name is not None:
if file_name != "": if file_name != "":
file_name += "-" file_name += "-"
file_name += data[contact].name.replace("/", "-") file_name += chat.name.replace("/", "-")
name = data[contact].name name = chat.name
else: else:
name = phone_number name = phone_number
safe_file_name = ''
safe_file_name = "".join(x for x in file_name if x.isalnum() or x in "- ") safe_file_name = "".join(x for x in file_name if x.isalnum() or x in "- ")
with open(f"{output_folder}/{safe_file_name}.html", "w", encoding="utf-8") as f:
f.write( if maximum_size is not None:
template.render( current_size = 0
name=name, current_page = 1
msgs=data[contact].messages.values(), render_box = []
my_avatar=None, if maximum_size == 0:
their_avatar=f"WhatsApp/Avatars/{contact}.j", maximum_size = MAX_SIZE
w3css=w3css last_msg = chat.get_last_message().key_id
for message in chat.get_messages():
if message.data is not None and not message.meta and not message.media:
current_size += len(message.data) + ROW_SIZE
else:
current_size += ROW_SIZE + 100 # Assume media and meta HTML are 100 bytes
if current_size > maximum_size:
output_file_name = f"{output_folder}/{safe_file_name}-{current_page}.html"
rendering(
output_file_name,
template,
name,
render_box,
contact,
w3css,
f"{safe_file_name}-{current_page + 1}.html"
) )
render_box = [message]
current_size = 0
current_page += 1
else:
if message.key_id == last_msg:
if current_page == 1:
output_file_name = f"{output_folder}/{safe_file_name}.html"
else:
output_file_name = f"{output_folder}/{safe_file_name}-{current_page}.html"
rendering(
output_file_name,
template,
name,
render_box,
contact,
w3css,
False
) )
else:
render_box.append(message)
else:
output_file_name = f"{output_folder}/{safe_file_name}.html"
rendering(output_file_name, template, name, chat.get_messages(), contact, w3css, False)
if current % 10 == 0: if current % 10 == 0:
print(f"Creating HTML...({current}/{total_row_number})", end="\r") print(f"Creating HTML...({current}/{total_row_number})", end="\r")

View File

@@ -4,6 +4,10 @@ from datetime import datetime
from enum import Enum from enum import Enum
MAX_SIZE = 4 * 1024 * 1024 # Default 4MB
ROW_SIZE = 0x300
def sanitize_except(html): def sanitize_except(html):
return Markup(sanitize(html, tags=["br"])) return Markup(sanitize(html, tags=["br"]))
@@ -17,28 +21,6 @@ def determine_day(last, current):
return current return current
# Android Specific
CRYPT14_OFFSETS = (
{"iv": 67, "db": 191},
{"iv": 67, "db": 190},
{"iv": 66, "db": 99},
{"iv": 67, "db": 193},
{"iv": 67, "db": 194},
)
class Crypt(Enum):
CRYPT15 = 15
CRYPT14 = 14
CRYPT12 = 12
def brute_force_offset(max_iv=200, max_db=200):
for iv in range(0, max_iv):
for db in range(0, max_db):
yield iv, iv + 16, db
def check_update(): def check_update():
import urllib.request import urllib.request
import json import json
@@ -70,6 +52,42 @@ def check_update():
print("You are using the latest version of WhatsApp Chat Exporter.") print("You are using the latest version of WhatsApp Chat Exporter.")
return 0 return 0
# iOS Specific
def rendering(output_file_name, template, name, msgs, contact, w3css, next):
with open(output_file_name, "w", encoding="utf-8") as f:
f.write(
template.render(
name=name,
msgs=msgs,
my_avatar=None,
their_avatar=f"WhatsApp/Avatars/{contact}.j",
w3css=w3css,
next=next
)
)
# Android Specific
CRYPT14_OFFSETS = (
{"iv": 67, "db": 191},
{"iv": 67, "db": 190},
{"iv": 66, "db": 99},
{"iv": 67, "db": 193},
{"iv": 67, "db": 194},
)
class Crypt(Enum):
CRYPT15 = 15
CRYPT14 = 14
CRYPT12 = 12
def brute_force_offset(max_iv=200, max_db=200):
for iv in range(0, max_iv):
for db in range(0, max_db):
yield iv, iv + 16, db
# iOS Specific
APPLE_TIME = datetime.timestamp(datetime(2001, 1, 1)) APPLE_TIME = datetime.timestamp(datetime(2001, 1, 1))

View File

@@ -193,7 +193,11 @@
</div> </div>
</article> </article>
<footer class="w3-center"> <footer class="w3-center">
{% if next %}
<a href="./{{ next }}">Next</a>
{% else %}
End of history End of history
{% endif %}
</footer> </footer>
</body> </body>
</html> </html>