Add support for separating media files per chat

This commit is contained in:
Mert Tunc
2024-04-15 19:20:33 +03:00
parent a569fb0875
commit 0eebbcff21
6 changed files with 51 additions and 8 deletions

View File

@@ -118,7 +118,7 @@ usage: wtsexporter [-h] [-a] [-i] [-e EXPORTED] [-w WA] [-m MEDIA] [-b BACKUP] [
[-k KEY] [-t TEMPLATE] [-s] [-c] [--offline OFFLINE] [--size [SIZE]] [--no-html] [--check-update] [-k KEY] [-t TEMPLATE] [-s] [-c] [--offline OFFLINE] [--size [SIZE]] [--no-html] [--check-update]
[--assume-first-as-me] [--no-avatar] [--import] [--business] [--preserve-timestamp] [--wab WAB] [--assume-first-as-me] [--no-avatar] [--import] [--business] [--preserve-timestamp] [--wab WAB]
[--time-offset {-12 to 14}] [--date DATE] [--date-format FORMAT] [--include [phone number ...]] [--time-offset {-12 to 14}] [--date DATE] [--date-format FORMAT] [--include [phone number ...]]
[--exclude [phone number ...]] [--exclude [phone number ...]] [--create-separated-media]
A customizable Android and iPhone WhatsApp database parser that will give you the history of your WhatsApp A customizable Android and iPhone WhatsApp database parser that will give you the history of your WhatsApp
conversations in HTML and JSON. Android Backup Crypt12, Crypt14 and Crypt15 supported. conversations in HTML and JSON. Android Backup Crypt12, Crypt14 and Crypt15 supported.
@@ -164,8 +164,11 @@ options:
Include chats that match the supplied phone number Include chats that match the supplied phone number
--exclude [phone number ...] --exclude [phone number ...]
Exclude chats that match the supplied phone number Exclude chats that match the supplied phone number
--create-separated-media
Create a copy of the media seperated per chat in <MEDIA>/separated/ directory
(Android only)
WhatsApp Chat Exporter: 0.9.7 Licensed with MIT WhatsApp Chat Exporter: 0.10.0 Licensed with MIT
``` ```
# To do # To do

View File

@@ -1,3 +1,3 @@
#!/usr/bin/python3 #!/usr/bin/python3
__version__ = "0.9.7" __version__ = "0.10.0"

View File

@@ -245,6 +245,13 @@ def main():
action='store_true', action='store_true',
help="Output the JSON file per chat" help="Output the JSON file per chat"
) )
parser.add_argument(
"--create-separated-media",
dest="separate_media",
default=False,
action='store_true',
help="Create a copy of the media seperated per chat in <MEDIA>/separated/ directory (Android only)"
)
args = parser.parse_args() args = parser.parse_args()
# Check for updates # Check for updates
@@ -264,6 +271,8 @@ def main():
parser.error("JSON file not found.") parser.error("JSON file not found.")
if args.android and args.business: if args.android and args.business:
parser.error("WhatsApp Business is only available on iOS for now.") parser.error("WhatsApp Business is only available on iOS for now.")
if args.ios and args.seperate_media:
parser.error("Separate media is only available on Android for now.")
if args.json_per_chat and ( if args.json_per_chat and (
(args.json[-5:] != ".json" and os.path.isfile(args.json)) or \ (args.json[-5:] != ".json" and os.path.isfile(args.json)) or \
(args.json[-5:] == ".json" and os.path.isfile(args.json[:-5])) (args.json[-5:] == ".json" and os.path.isfile(args.json[:-5]))
@@ -310,7 +319,6 @@ def main():
parser.error("Enter a phone number in the chat filter. See https://wts.knugi.dev/docs?dest=chat") parser.error("Enter a phone number in the chat filter. See https://wts.knugi.dev/docs?dest=chat")
filter_chat = (args.filter_chat_include, args.filter_chat_exclude) filter_chat = (args.filter_chat_include, args.filter_chat_exclude)
data = {} data = {}
if args.android: if args.android:
@@ -417,7 +425,7 @@ def main():
with sqlite3.connect(msg_db) as db: with sqlite3.connect(msg_db) as db:
db.row_factory = sqlite3.Row db.row_factory = sqlite3.Row
messages(db, data, args.media, args.timezone_offset, args.filter_date, filter_chat) messages(db, data, args.media, args.timezone_offset, args.filter_date, filter_chat)
media(db, data, args.media, args.filter_date, filter_chat) media(db, data, args.media, args.filter_date, filter_chat, args.separate_media)
vcard(db, data, args.media, args.filter_date, filter_chat) vcard(db, data, args.media, args.filter_date, filter_chat)
if args.android: if args.android:
android_handler.calls(db, data, args.timezone_offset, filter_chat) android_handler.calls(db, data, args.timezone_offset, filter_chat)

View File

@@ -4,6 +4,7 @@ import sqlite3
import os import os
import io import io
import hmac import hmac
import shutil
from pathlib import Path from pathlib import Path
from mimetypes import MimeTypes from mimetypes import MimeTypes
from hashlib import sha256 from hashlib import sha256
@@ -12,7 +13,7 @@ from Whatsapp_Chat_Exporter.data_model import ChatStore, Message
from Whatsapp_Chat_Exporter.utility import MAX_SIZE, ROW_SIZE, DbType, determine_metadata, JidType from Whatsapp_Chat_Exporter.utility import MAX_SIZE, ROW_SIZE, DbType, determine_metadata, JidType
from Whatsapp_Chat_Exporter.utility import rendering, Crypt, Device, get_file_name, setup_template from Whatsapp_Chat_Exporter.utility import rendering, Crypt, Device, get_file_name, setup_template
from Whatsapp_Chat_Exporter.utility import brute_force_offset, CRYPT14_OFFSETS, get_status_location from Whatsapp_Chat_Exporter.utility import brute_force_offset, CRYPT14_OFFSETS, get_status_location
from Whatsapp_Chat_Exporter.utility import get_chat_condition from Whatsapp_Chat_Exporter.utility import get_chat_condition, slugify
try: try:
import zlib import zlib
@@ -477,7 +478,7 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat):
print(f"Processing messages...({total_row_number}/{total_row_number})", end="\r") print(f"Processing messages...({total_row_number}/{total_row_number})", end="\r")
def media(db, data, media_folder, filter_date, filter_chat): def media(db, data, media_folder, filter_date, filter_chat, separate_media=True):
# Get media # Get media
c = db.cursor() c = db.cursor()
try: try:
@@ -569,6 +570,18 @@ def media(db, data, media_folder, filter_date, filter_chat):
message.mime = "application/octet-stream" message.mime = "application/octet-stream"
else: else:
message.mime = content["mime_type"] message.mime = content["mime_type"]
if separate_media:
chat_display_name = data[content["key_remote_jid"]].name or slugify(message.sender) or "Unknown"
separated_media_folder = f"{media_folder}/separated/"
current_filename = file_path.split("/")[-1]
new_folder = f"{separated_media_folder}/{chat_display_name}"
Path(new_folder).mkdir(parents=True, exist_ok=True)
new_path = f"{new_folder}/{current_filename}"
shutil.copy2(file_path, new_path)
message.data = new_path
else: else:
if False: # Block execution if False: # Block execution
try: try:

View File

@@ -205,7 +205,7 @@ def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat):
f"Processing messages...({total_row_number}/{total_row_number})", end="\r") f"Processing messages...({total_row_number}/{total_row_number})", end="\r")
def media(db, data, media_folder, filter_date, filter_chat): def media(db, data, media_folder, filter_date, filter_chat, separate_media=False):
c = db.cursor() c = db.cursor()
# Get media # Get media
c.execute(f"""SELECT count() c.execute(f"""SELECT count()

View File

@@ -3,6 +3,8 @@ import json
import os import os
from bleach import clean as sanitize from bleach import clean as sanitize
from markupsafe import Markup from markupsafe import Markup
import unicodedata
import re
from datetime import datetime from datetime import datetime
from enum import IntEnum from enum import IntEnum
from Whatsapp_Chat_Exporter.data_model import ChatStore from Whatsapp_Chat_Exporter.data_model import ChatStore
@@ -309,6 +311,23 @@ def setup_template(template, no_avatar):
APPLE_TIME = datetime.timestamp(datetime(2001, 1, 1)) APPLE_TIME = datetime.timestamp(datetime(2001, 1, 1))
def slugify(value, allow_unicode=False):
"""
Taken from https://github.com/django/django/blob/master/django/utils/text.py
Convert to ASCII if 'allow_unicode' is False. Convert spaces or repeated
dashes to single dashes. Remove characters that aren't alphanumerics,
underscores, or hyphens. Convert to lowercase. Also strip leading and
trailing whitespace, dashes, and underscores.
"""
value = str(value)
if allow_unicode:
value = unicodedata.normalize('NFKC', value)
else:
value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore').decode('ascii')
value = re.sub(r'[^\w\s-]', '', value.lower())
return re.sub(r'[-\s]+', '-', value).strip('-_')
class WhatsAppIdentifier(StrEnum): class WhatsAppIdentifier(StrEnum):
MESSAGE = "7c7fba66680ef796b916b067077cc246adacf01d" MESSAGE = "7c7fba66680ef796b916b067077cc246adacf01d"
CONTACT = "b8548dc30aa1030df0ce18ef08b882cf7ab5212f" CONTACT = "b8548dc30aa1030df0ce18ef08b882cf7ab5212f"