Merge branch 'dev'

This commit is contained in:
KnugiHK
2021-12-28 19:26:02 +08:00
8 changed files with 205 additions and 101 deletions

View File

@@ -12,6 +12,7 @@ A customizable Android and iPhone Whatsapp database parser that will give you th
First, install the exporter by:
```shell
pip install whatsapp-chat-exporter
pip install whatsapp-chat-exporter[android_backup] & :: Optional, if you want it to support decrypting Android WhatsApp backup.
```
Then, create a working directory in somewhere you want
```shell
@@ -56,7 +57,6 @@ Do an iPhone Backup with iTunes first.
If you want to work on an encrypted iPhone Backup, you should install iphone_backup_decrypt from [KnugiHK/iphone_backup_decrypt](https://github.com/KnugiHK/iphone_backup_decrypt) before you run the extract_iphone_media.py.
```sh
pip install biplist pycryptodome & :: Optional, since the pip will install these dependencies automatically.
pip install git+https://github.com/KnugiHK/iphone_backup_decrypt
```
### Extracting
@@ -88,13 +88,15 @@ Options:
-m MEDIA, --media=MEDIA
Path to WhatsApp media folder
-b BACKUP, --backup=BACKUP
Path to iPhone/Android (must be used together with -k)
Path to Android (must be used together with -k)/iPhone
WhatsApp backup
-o OUTPUT, --output=OUTPUT
Output to specific directory
-j, --json Save the result to a single JSON file
-d DB, --db=DB Path to database file
-k KEY, --key=KEY Path to key file
-t TEMPLATE, --template=TEMPLATE
Path to custom HTML template
```
# To do

View File

@@ -1 +1 @@
__version__ = "0.6"
__version__ = "0.6"

View File

@@ -1,5 +1,6 @@
from .__init__ import __version__
from Whatsapp_Chat_Exporter import extract, extract_iphone, extract_iphone_media
from Whatsapp_Chat_Exporter import extract, extract_iphone
from Whatsapp_Chat_Exporter import extract_iphone_media
from optparse import OptionParser
import os
import sqlite3
@@ -40,7 +41,8 @@ def main():
"--backup",
dest="backup",
default=None,
help="Path to Android (must be used together with -k)/iPhone WhatsApp backup")
help="Path to Android (must be used together "
"with -k)/iPhone WhatsApp backup")
parser.add_option(
"-o",
"--output",
@@ -67,6 +69,12 @@ def main():
default=None,
help="Path to key file"
)
parser.add_option(
"-t",
"--template",
dest="template",
default=None,
help="Path to custom HTML template")
(options, args) = parser.parse_args()
if options.android and options.iphone:
@@ -94,8 +102,10 @@ def main():
print("Decryption key specified, decrypting WhatsApp backup...")
key = open(options.key, "rb").read()
db = open(options.backup, "rb").read()
if not extract.decrypt_backup(db, key, msg_db):
print("Dependencies of decrypt_backup are not present. For details, see README.md")
is_crypt14 = False if "crypt12" in options.backup else True
if not extract.decrypt_backup(db, key, msg_db, is_crypt14):
print("Dependencies of decrypt_backup are not "
"present. For details, see README.md")
return False
if options.wa is None:
contact_db = "wa.db"
@@ -103,14 +113,14 @@ def main():
contact_db = options.wa
if options.media is None:
options.media = "WhatsApp"
if len(args) == 1:
msg_db = args[0]
if os.path.isfile(contact_db):
with sqlite3.connect(contact_db) as db:
contacts(db, data)
elif options.iphone:
messages = extract_iphone.messages
media = extract_iphone.media
@@ -137,7 +147,7 @@ def main():
messages(db, data)
media(db, data, options.media)
vcard(db, data)
create_html(data, options.output)
create_html(data, options.output, options.template)
if not os.path.isdir(f"{options.output}/{options.media}"):
shutil.move(options.media, f"{options.output}/")

View File

@@ -8,6 +8,9 @@ import requests
import shutil
import re
import pkgutil
from pathlib import Path
from bleach import clean as sanitize
from markupsafe import Markup
from datetime import datetime
from mimetypes import MimeTypes
try:
@@ -19,6 +22,10 @@ else:
support_backup = True
def sanitize_except(html):
return Markup(sanitize(html, tags=["br"]))
def determine_day(last, current):
last = datetime.fromtimestamp(last).date()
current = datetime.fromtimestamp(current).date()
@@ -28,20 +35,27 @@ def determine_day(last, current):
return current
def decrypt_backup(database, key, output):
def decrypt_backup(database, key, output, crypt14=True):
if not support_backup:
return False
if len(key) != 158:
raise ValueError("The key file must be 158 bytes")
if len(database) < 191:
raise ValueError("The database file must be at least 191 bytes")
t1 = key[30:62]
t2 = database[15:47]
if crypt14:
if len(database) < 191:
raise ValueError("The crypt14 file must be at least 191 bytes")
t2 = database[15:47]
iv = database[67:83]
db_ciphertext = database[191:]
else:
if len(database) < 67:
raise ValueError("The crypt12 file must be at least 67 bytes")
t2 = database[3:35]
iv = database[51:67]
db_ciphertext = database[67:-20]
if t1 != t2:
raise ValueError("The signature of key file and backup file mismatch")
iv = database[67:83]
db_ciphertext = database[191:]
main_key = key[126:]
cipher = AES.new(main_key, AES.MODE_GCM, iv)
db_compressed = cipher.decrypt(db_ciphertext)
@@ -105,7 +119,9 @@ def messages(db, data):
"timestamp": content[3]/1000,
"time": datetime.fromtimestamp(content[3]/1000).strftime("%H:%M"),
"media": False,
"key_id": content[13]
"key_id": content[13],
"meta": False,
"data": None
}
if "-" in content[0] and content[2] == 0:
name = None
@@ -140,8 +156,9 @@ def messages(db, data):
try:
int(content[4])
except ValueError:
msg = "{The group name changed to "f"{content[4]}"" }"
msg = f"The group name changed to {content[4]}"
data[content[0]]["messages"][content[1]]["data"] = msg
data[content[0]]["messages"][content[1]]["meta"] = True
else:
del data[content[0]]["messages"][content[1]]
else:
@@ -160,15 +177,16 @@ def messages(db, data):
name_left = data[content[8]]["name"]
else:
name_left = content[8].split('@')[0]
msg = "{"f"{name_left}"f" added {name_right or 'You'}""}"
msg = f"{name_left} added {name_right or 'You'}"
else:
msg = "{"f"Added {name_right or 'You'}""}"
msg = f"Added {name_right or 'You'}"
elif b"\xac\xed\x00\x05\x74\x00" in thumb_image:
# Changed number
original = content[8].split('@')[0]
changed = thumb_image[7:].decode().split('@')[0]
msg = "{"f"{original} changed to {changed}""}"
msg = f"{original} changed to {changed}"
data[content[0]]["messages"][content[1]]["data"] = msg
data[content[0]]["messages"][content[1]]["meta"] = True
else:
if content[4] is None:
del data[content[0]]["messages"][content[1]]
@@ -180,20 +198,34 @@ def messages(db, data):
else:
if content[2] == 1:
if content[5] == 5 and content[6] == 7:
msg = "{Message deleted}"
msg = "Message deleted"
data[content[0]]["messages"][content[1]]["meta"] = True
else:
if content[9] == "5":
msg = "{ Location shared: "f"{content[10], content[11]}"" }"
msg = f"Location shared: {content[10], content[11]}"
data[content[0]]["messages"][content[1]]["meta"] = True
else:
msg = content[4]
if msg is not None:
if "\r\n" in msg:
msg = msg.replace("\r\n", "<br>")
if "\n" in msg:
msg = msg.replace("\n", "<br>")
else:
if content[5] == 0 and content[6] == 7:
msg = "{Message deleted}"
msg = "Message deleted"
data[content[0]]["messages"][content[1]]["meta"] = True
else:
if content[9] == "5":
msg = "{ Location shared: "f"{content[10], content[11]}"" }"
msg = f"Location shared: {content[10], content[11]}"
data[content[0]]["messages"][content[1]]["meta"] = True
else:
msg = content[4]
if msg is not None:
if "\r\n" in msg:
msg = msg.replace("\r\n", "<br>")
if "\n" in msg:
msg = msg.replace("\n", "<br>")
data[content[0]]["messages"][content[1]]["data"] = msg
@@ -201,8 +233,7 @@ def messages(db, data):
if i % 1000 == 0:
print(f"Gathering messages...({i}/{total_row_number})", end="\r")
content = c.fetchone()
print(
f"Gathering messages...({total_row_number}/{total_row_number})", end="\r")
print(f"Gathering messages...({total_row_number}/{total_row_number})", end="\r")
def media(db, data, media_folder):
@@ -248,8 +279,9 @@ def media(db, data, media_folder):
# data[content[0]]["messages"][content[1]]["media"] = True
# data[content[0]]["messages"][content[1]]["mime"] = "media"
# else:
data[content[0]]["messages"][content[1]]["data"] = "{The media is missing}"
data[content[0]]["messages"][content[1]]["data"] = "The media is missing"
data[content[0]]["messages"][content[1]]["mime"] = "media"
data[content[0]]["messages"][content[1]]["meta"] = True
i += 1
if i % 100 == 0:
print(f"Gathering media...({i}/{total_row_number})", end="\r")
@@ -272,27 +304,34 @@ def vcard(db, data):
total_row_number = len(rows)
print(f"\nGathering vCards...(0/{total_row_number})", end="\r")
base = "WhatsApp/vCards"
if not os.path.isdir(base):
Path(base).mkdir(parents=True, exist_ok=True)
for index, row in enumerate(rows):
if not os.path.isdir(base):
os.mkdir(base)
file_name = "".join(x for x in row[3] if x.isalnum())
file_path = f"{base}/{file_name}.vcf"
if not os.path.isfile(file_path):
with open(file_path, "w", encoding="utf-8") as f:
f.write(row[2])
data[row[1]]["messages"][row[0]]["data"] = row[3] + \
"{ The vCard file cannot be displayed here, however it " \
"should be located at " + file_path + "}"
"The vCard file cannot be displayed here, " \
f"however it should be located at {file_path}"
data[row[1]]["messages"][row[0]]["mime"] = "text/x-vcard"
data[row[1]]["messages"][row[0]]["meta"] = True
print(f"Gathering vCards...({index + 1}/{total_row_number})", end="\r")
def create_html(data, output_folder):
templateLoader = jinja2.FileSystemLoader(searchpath=os.path.dirname(__file__))
def create_html(data, output_folder, template=None):
if template is None:
template_dir = os.path.dirname(__file__)
template_file = "whatsapp.html"
else:
template_dir = os.path.dirname(template)
template_file = os.path.basename(template)
templateLoader = jinja2.FileSystemLoader(searchpath=template_dir)
templateEnv = jinja2.Environment(loader=templateLoader)
templateEnv.globals.update(determine_day=determine_day)
TEMPLATE_FILE = "whatsapp.html"
template = templateEnv.get_template(TEMPLATE_FILE)
templateEnv.filters['sanitize_except'] = sanitize_except
template = templateEnv.get_template(template_file)
total_row_number = len(data)
print(f"\nCreating HTML...(0/{total_row_number})", end="\r")

View File

@@ -7,12 +7,19 @@ import os
import requests
import shutil
import pkgutil
from pathlib import Path
from bleach import clean as sanitize
from markupsafe import Markup
from datetime import datetime
from mimetypes import MimeTypes
APPLE_TIME = datetime.timestamp(datetime(2001, 1, 1))
def sanitize_except(html):
return Markup(sanitize(html, tags=["br"]))
def determine_day(last, current):
last = datetime.fromtimestamp(last).date()
current = datetime.fromtimestamp(current).date()
@@ -62,7 +69,9 @@ def messages(db, data):
"time": datetime.fromtimestamp(ts).strftime("%H:%M"),
"media": False,
"reply": None,
"caption": None
"caption": None,
"meta": False,
"data": None
}
if "-" in content[0] and content[2] == 0:
name = None
@@ -87,8 +96,9 @@ def messages(db, data):
try:
int(content[4])
except ValueError:
msg = "{The group name changed to "f"{content[4]}"" }"
msg = f"The group name changed to {content[4]}"
data[content[0]]["messages"][content[1]]["data"] = msg
data[content[0]]["messages"][content[1]]["meta"] = True
else:
del data[content[0]]["messages"][content[1]]
else:
@@ -99,14 +109,26 @@ def messages(db, data):
# real message
if content[2] == 1:
if content[5] == 14:
msg = "{Message deleted}"
msg = "Message deleted"
data[content[0]]["messages"][content[1]]["meta"] = True
else:
msg = content[4]
if msg is not None:
if "\r\n" in msg:
msg = msg.replace("\r\n", "<br>")
if "\n" in msg:
msg = msg.replace("\n", "<br>")
else:
if content[5] == 14:
msg = "{Message deleted}"
msg = "Message deleted"
data[content[0]]["messages"][content[1]]["meta"] = True
else:
msg = content[4]
if msg is not None:
if "\r\n" in msg:
msg = msg.replace("\r\n", "<br>")
if "\n" in msg:
msg = msg.replace("\n", "<br>")
data[content[0]]["messages"][content[1]]["data"] = msg
i += 1
if i % 1000 == 0:
@@ -138,7 +160,7 @@ def media(db, data, media_folder):
content = c.fetchone()
mime = MimeTypes()
while content is not None:
file_path = f"Message/{content[2]}"
file_path = f"{media_folder}/{content[2]}"
data[content[0]]["messages"][content[1]]["media"] = True
if os.path.isfile(file_path):
@@ -161,8 +183,9 @@ def media(db, data, media_folder):
# data[content[0]]["messages"][content[1]]["data"] = "{The media is missing}"
# data[content[0]]["messages"][content[1]]["mime"] = "media"
# else:
data[content[0]]["messages"][content[1]]["data"] = "{The media is missing}"
data[content[0]]["messages"][content[1]]["data"] = "The media is missing"
data[content[0]]["messages"][content[1]]["mime"] = "media"
data[content[0]]["messages"][content[1]]["meta"] = True
if content[6] is not None:
data[content[0]]["messages"][content[1]]["caption"] = content[6]
i += 1
@@ -190,29 +213,36 @@ def vcard(db, data):
total_row_number = len(rows)
print(f"\nGathering vCards...(0/{total_row_number})", end="\r")
base = "Message/vCards"
if not os.path.isdir(base):
Path(base).mkdir(parents=True, exist_ok=True)
for index, row in enumerate(rows):
if not os.path.isdir(base):
os.mkdir(base)
file_name = "".join(x for x in row[3] if x.isalnum())
file_path = f"{base}/{file_name[:200]}.vcf"
if not os.path.isfile(file_path):
with open(file_path, "w", encoding="utf-8") as f:
f.write(row[4])
data[row[2]]["messages"][row[1]]["data"] = row[3] + \
"{ The vCard file cannot be displayed here, however it " \
"should be located at " + file_path + "}"
"The vCard file cannot be displayed here, " \
f"however it should be located at {file_path}"
data[row[2]]["messages"][row[1]]["mime"] = "text/x-vcard"
data[row[2]]["messages"][row[1]]["media"] = True
data[row[2]]["messages"][row[1]]["meta"] = True
print(f"Gathering vCards...({index + 1}/{total_row_number})", end="\r")
def create_html(data, output_folder):
templateLoader = jinja2.FileSystemLoader(searchpath=os.path.dirname(__file__))
def create_html(data, output_folder, template=None):
if template is None:
template_dir = os.path.dirname(__file__)
template_file = "whatsapp.html"
else:
template_dir = os.path.dirname(template)
template_file = os.path.basename(template)
templateLoader = jinja2.FileSystemLoader(searchpath=template_dir)
templateEnv = jinja2.Environment(loader=templateLoader)
templateEnv.globals.update(determine_day=determine_day)
TEMPLATE_FILE = "whatsapp.html"
template = templateEnv.get_template(TEMPLATE_FILE)
templateEnv.filters['sanitize_except'] = sanitize_except
template = templateEnv.get_template(template_file)
total_row_number = len(data)
print(f"\nCreating HTML...(0/{total_row_number})", end="\r")

View File

@@ -6,21 +6,24 @@ import os
import getpass
try:
from iphone_backup_decrypt import EncryptedBackup, RelativePath
except:
except ModuleNotFoundError:
support_encrypted = False
else:
support_encrypted = True
def extract_encrypted(base_dir, password):
backup = EncryptedBackup(backup_directory=base_dir, passphrase=password)
print("Decrypting WhatsApp database...")
backup.extract_file(relative_path=RelativePath.WHATSAPP_MESSAGES, output_filename="7c7fba66680ef796b916b067077cc246adacf01d")
backup.extract_file(relative_path=RelativePath.WHATSAPP_CONTACTS, output_filename="ContactsV2.sqlite")
backup.extract_file(relative_path=RelativePath.WHATSAPP_MESSAGES,
output_filename="7c7fba66680ef796b916b067077cc246adacf01d")
backup.extract_file(relative_path=RelativePath.WHATSAPP_CONTACTS,
output_filename="ContactsV2.sqlite")
data = backup.execute_sql("""SELECT count()
FROM Files
WHERE relativePath
LIKE 'Message/Media/%'"""
)
)
total_row_number = data[0][0]
print(f"Gathering media...(0/{total_row_number})", end="\r")
data = backup.execute_sql("""SELECT fileID,
@@ -30,7 +33,7 @@ def extract_encrypted(base_dir, password):
FROM Files
WHERE relativePath
LIKE 'Message/Media/%'"""
)
)
if not os.path.isdir("Message"):
os.mkdir("Message")
if not os.path.isdir("Message/Media"):
@@ -43,7 +46,7 @@ def extract_encrypted(base_dir, password):
flags = row[2]
file = row[3]
if flags == 2:
try:
try:
os.mkdir(destination)
except FileExistsError:
pass
@@ -56,6 +59,7 @@ def extract_encrypted(base_dir, password):
print(f"Gathering media...({i}/{total_row_number})", end="\r")
print(f"Gathering media...({total_row_number}/{total_row_number})", end="\r")
def is_encrypted(base_dir):
with sqlite3.connect(f"{base_dir}/Manifest.db") as f:
c = f.cursor()
@@ -68,6 +72,7 @@ def is_encrypted(base_dir):
else:
return False
def extract_media(base_dir):
if is_encrypted(base_dir):
if not support_encrypted:
@@ -81,7 +86,7 @@ def extract_media(base_dir):
wts_db = os.path.join(base_dir, "7c/7c7fba66680ef796b916b067077cc246adacf01d")
if not os.path.isfile(wts_db):
print("WhatsApp database not found.")
sys.exit(1)
exit()
else:
shutil.copyfile(wts_db, "7c7fba66680ef796b916b067077cc246adacf01d")
with sqlite3.connect(f"{base_dir}/Manifest.db") as manifest:

View File

@@ -72,29 +72,37 @@
<a href="#{{msg.reply}}" style="color: #168acc;">"{{ msg.quoted_data or 'media' }}"</a>
</div>
{% endif %}
{% if msg.media == false %}
{% filter escape %}{{ msg.data or "{This message is not supported yet}" | replace('\n', '<br>') }}{% endfilter %}
{% if msg.meta == true or msg.media == false and msg.data is none %}
<div style="text-align: center;" class="w3-panel w3-border-blue w3-pale-blue w3-rightbar w3-leftbar">
<p>{{ msg.data or 'This message is not supported' }}</p>
</div>
{% else %}
{% if "image/" in msg.mime %}
<a href="{{ msg.data }}"><img src="{{ msg.data }}" /></a>
{% elif "audio/" in msg.mime %}
<audio controls="controls" autobuffer="autobuffer">
<source src="{{ msg.data }}" />
</audio>
{% elif "video/" in msg.mime %}
<video controls="controls" autobuffer="autobuffer">
<source src="{{ msg.data }}" />
</video>
{% elif "/" in msg.mime %}
{The file cannot be displayed here, however it should be located at {{ msg.data }}}
{% if msg.media == false %}
{{ msg.data | sanitize_except() }}
{% else %}
{% filter escape %}{{ msg.data }}{% endfilter %}
{% if "image/" in msg.mime %}
<a href="{{ msg.data }}"><img src="{{ msg.data }}" /></a>
{% elif "audio/" in msg.mime %}
<audio controls="controls" autobuffer="autobuffer">
<source src="{{ msg.data }}" />
</audio>
{% elif "video/" in msg.mime %}
<video controls="controls" autobuffer="autobuffer">
<source src="{{ msg.data }}" />
</video>
{% elif "/" in msg.mime %}
<div style="text-align: center;" class="w3-panel w3-border-blue w3-pale-blue w3-rightbar w3-leftbar">
<p>The file cannot be displayed here, however it should be located at {{ msg.data }}</p>
</div>
{% else %}
{% filter escape %}{{ msg.data }}{% endfilter %}
{% endif %}
{% if msg.caption is not none %}
<br>
{{ msg.caption }}
{% endif %}
{% endif %}
{% if msg.caption is not none %}
<br>
{{ msg.caption }}
{% endif %}
{% endif %}
</div>
</div>
<div class="w3-col m2 l2" style="padding-left: 10px"><img src="{{ my_avatar }}" onerror="this.style.display='none'"></div>
@@ -120,27 +128,35 @@
<a href="#{{msg.reply}}" style="color: #168acc;">"{{ msg.quoted_data or 'media' }}"</a>
</div>
{% endif %}
{% if msg.media == false %}
{% filter escape %}{{ msg.data or "{This message is not supported yet}" }}{% endfilter %}
{% if msg.meta == true or msg.media == false and msg.data is none %}
<div style="text-align: center;" class="w3-panel w3-border-blue w3-pale-blue w3-rightbar w3-leftbar">
<p>{{ msg.data or 'This message is not supported' }}</p>
</div>
{% else %}
{% if "image/" in msg.mime %}
<a href="{{ msg.data }}"><img src="{{ msg.data }}" /></a>
{% elif "audio/" in msg.mime %}
<audio controls="controls" autobuffer="autobuffer">
<source src="{{ msg.data }}" />
</audio>
{% elif "video/" in msg.mime %}
<video controls="controls" autobuffer="autobuffer">
<source src="{{ msg.data }}" />
</video>
{% elif "/" in msg.mime %}
{The file cannot be displayed here, however it should be located at {{ msg.data }}}
{% if msg.media == false %}
{{ msg.data | sanitize_except() }}
{% else %}
{% filter escape %}{{ msg.data }}{% endfilter %}
{% endif %}
{% if msg.caption is not none %}
<br>
{{ msg.caption }}
{% if "image/" in msg.mime %}
<a href="{{ msg.data }}"><img src="{{ msg.data }}" /></a>
{% elif "audio/" in msg.mime %}
<audio controls="controls" autobuffer="autobuffer">
<source src="{{ msg.data }}" />
</audio>
{% elif "video/" in msg.mime %}
<video controls="controls" autobuffer="autobuffer">
<source src="{{ msg.data }}" />
</video>
{% elif "/" in msg.mime %}
<div style="text-align: center;" class="w3-panel w3-border-blue w3-pale-blue w3-rightbar w3-leftbar">
<p>The file cannot be displayed here, however it should be located at {{ msg.data }}</p>
</div>
{% else %}
{% filter escape %}{{ msg.data }}{% endfilter %}
{% endif %}
{% if msg.caption is not none %}
<br>
{{ msg.caption }}
{% endif %}
{% endif %}
{% endif %}
</div>

View File

@@ -12,12 +12,13 @@ setuptools.setup(
version=version,
author="KnugiHK",
author_email="info@knugi.com",
description="A Whatsapp database parser that will give you the history of your Whatsapp conversations in HTML and JSON.",
description="A Whatsapp database parser that will give you the "
"history of your Whatsapp conversations in HTML and JSON.",
long_description=long_description,
long_description_content_type="text/markdown",
url="https://github.com/KnugiHK/Whatsapp-Chat-Exporter",
packages=setuptools.find_packages(),
package_data = {
package_data={
'': ['whatsapp.html']
},
classifiers=[
@@ -36,9 +37,10 @@ setuptools.setup(
],
python_requires='>=3.7',
install_requires=[
'jinja2'
'jinja2',
'bleach'
],
extras_require = {
extras_require={
'android_backup': ["pycryptodome"]
},
entry_points={
@@ -46,4 +48,4 @@ setuptools.setup(
"wtsexporter = Whatsapp_Chat_Exporter.__main__:main"
]
}
)
)