diff --git a/.gitignore b/.gitignore index dd82d78..5831f34 100644 --- a/.gitignore +++ b/.gitignore @@ -134,3 +134,14 @@ dmypy.json *.onefile-build/ *.exe __main__ + + +# Dev time intermidiates & temp files +result/ +WhatsApp/ +/*.db +/*.db-* +/myout +/msgstore.db +/myout-json +.vscode/ \ No newline at end of file diff --git a/Whatsapp_Chat_Exporter/__main__.py b/Whatsapp_Chat_Exporter/__main__.py index a332181..236801a 100644 --- a/Whatsapp_Chat_Exporter/__main__.py +++ b/Whatsapp_Chat_Exporter/__main__.py @@ -7,10 +7,17 @@ import shutil import json import string import glob +try: + import vobject +except ModuleNotFoundError: + vcards_deps_installed = False +else: + vcards_deps_installed = True from Whatsapp_Chat_Exporter import exported_handler, android_handler from Whatsapp_Chat_Exporter import ios_handler, ios_media_handler +from Whatsapp_Chat_Exporter.contacts_names_from_vcards import ContactsNamesFromVCards, readVCardsFile from Whatsapp_Chat_Exporter.data_model import ChatStore -from Whatsapp_Chat_Exporter.utility import APPLE_TIME, Crypt, DbType +from Whatsapp_Chat_Exporter.utility import APPLE_TIME, Crypt, DbType, is_chat_empty from Whatsapp_Chat_Exporter.utility import check_update, import_from_json from argparse import ArgumentParser, SUPPRESS from datetime import datetime @@ -85,6 +92,18 @@ def main(): type=str, const="result.json", help="Save the result to a single JSON file (default if present: result.json)") + parser.add_argument( + '--avoidJSONEnsureAscii', + dest='avoid_json_ensure_ascii', + default=False, + action='store_true', + help="Don't encode non-ascii chars in the output json files") + parser.add_argument( + '--prettyPrintJson', + dest='pretty_print_json', + default=False, + action='store_true', + help="Pretty print the output json") parser.add_argument( '-d', '--db', @@ -239,6 +258,13 @@ def main(): metavar="phone number", help="Exclude chats that match the supplied phone number" ) + parser.add_argument( + "--filter-empty", + dest="filter_empty", + default=False, + action='store_true', + help="Exclude empty chats or with zero messages with content" + ) parser.add_argument( "--per-chat", dest="json_per_chat", @@ -253,6 +279,20 @@ def main(): action='store_true', help="Create a copy of the media seperated per chat in /separated/ directory" ) + parser.add_argument( + "--enrich-names-from-vcards", + dest="enrich_names_from_vcards", + default=None, + help="Path to an exported vcf file from google contacts export, add names missing from wab database" + ) + + parser.add_argument( + "--default-country-code-for-enrich-names-from-vcards", + dest="default_country_code_for_enrich_names_from_vcards", + default=None, + help="When numbers in enrich-names-from-vcards does not have country code, this will be used. 1 is for US, 66 for Thailand etc. most likely use the number of your own country" + ) + args = parser.parse_args() # Check for updates @@ -317,9 +357,19 @@ def main(): if not chat.isnumeric(): parser.error("Enter a phone number in the chat filter. See https://wts.knugi.dev/docs?dest=chat") filter_chat = (args.filter_chat_include, args.filter_chat_exclude) + if args.enrich_names_from_vcards is not None and args.default_country_code_for_enrich_names_from_vcards is None: + parser.error("When --enrich-names-from-vcards is provided, you must also set --default-country-code-for-enrich-names-from-vcards") data = {} + contacts_names_from_vcards_enricher = ContactsNamesFromVCards() + + if args.enrich_names_from_vcards is not None: + if not vcards_deps_installed: + parser.error("To use --enrich-names-from-vcards, you must install whatsapp-chat-exporter[vcards]") + + contacts_names_from_vcards_enricher.load_vcf_file(args.enrich_names_from_vcards, args.default_country_code_for_enrich_names_from_vcards) + if args.android: contacts = android_handler.contacts messages = android_handler.messages @@ -429,6 +479,12 @@ def main(): if args.android: android_handler.calls(db, data, args.timezone_offset, filter_chat) if not args.no_html: + if contacts_names_from_vcards_enricher.should_enrich_names_from_vCards(): + contacts_names_from_vcards_enricher.enrich_names_from_vCards(data) + + if (args.filter_empty): + data = {k: v for k, v in data.items() if not is_chat_empty(v)} + create_html( data, args.output, @@ -487,11 +543,18 @@ def main(): ) if args.json and not args.import_json: + if (args.filter_empty): + data = {k: v for k, v in data.items() if not is_chat_empty(v)} + + if contacts_names_from_vcards_enricher.should_enrich_names_from_vCards(): + contacts_names_from_vcards_enricher.enrich_names_from_vCards(data) + if isinstance(data[next(iter(data))], ChatStore): data = {jik: chat.to_json() for jik, chat in data.items()} + if not args.json_per_chat: with open(args.json, "w") as f: - data = json.dumps(data) + data = json.dumps(data, ensure_ascii=not args.avoid_json_ensure_ascii, indent=2 if args.pretty_print_json else None) print(f"\nWriting JSON file...({int(len(data)/1024/1024)}MB)") f.write(data) else: @@ -506,7 +569,8 @@ def main(): else: contact = jik.replace('+', '') with open(f"{args.json}/{contact}.json", "w") as f: - f.write(json.dumps(data[jik])) + file_content_to_write = json.dumps(data[jik], ensure_ascii=not args.avoid_json_ensure_ascii, indent=2 if args.pretty_print_json else None) + f.write(file_content_to_write) print(f"Writing JSON file...({index + 1}/{total})", end="\r") print() else: diff --git a/Whatsapp_Chat_Exporter/android_handler.py b/Whatsapp_Chat_Exporter/android_handler.py index 2024112..193d5df 100644 --- a/Whatsapp_Chat_Exporter/android_handler.py +++ b/Whatsapp_Chat_Exporter/android_handler.py @@ -158,6 +158,8 @@ def contacts(db, data): c.execute("""SELECT count() FROM wa_contacts""") total_row_number = c.fetchone()[0] print(f"Processing contacts...({total_row_number})") + if total_row_number == 0: + print("No contacts profiles found in database, consider using --enrich-names-from-vcards when exported contacts from google") c.execute("""SELECT jid, COALESCE(display_name, wa_name) as display_name, status FROM wa_contacts; """) row = c.fetchone() diff --git a/Whatsapp_Chat_Exporter/contacts_names_from_vcards.py b/Whatsapp_Chat_Exporter/contacts_names_from_vcards.py new file mode 100644 index 0000000..b1146b0 --- /dev/null +++ b/Whatsapp_Chat_Exporter/contacts_names_from_vcards.py @@ -0,0 +1,88 @@ +import itertools +from typing import List, TypedDict + +try: + import vobject +except ModuleNotFoundError: + vcards_deps_installed = False +else: + vcards_deps_installed = True + +class ContactsNamesFromVCards: + def __init__(self) -> None: + self.l = [] + + def should_enrich_names_from_vCards(self): + return len(self.l) > 0 + + def load_vcf_file(self, vcfFilePath: str, default_country_calling_code: str): + if not vcards_deps_installed: + raise Exception('Invariant: vobject is missing') + self.l = readVCardsFile(vcfFilePath, default_country_calling_code) + + def enrich_names_from_vCards(self, chats): + for counter, (number, name) in enumerate(self.l): + # short number must be a bad contact, lets skip it + if len(number) <= 5: + continue + + for counter, chat in enumerate(filter_dict_by_prefix(chats, number).values()): + if not hasattr(chat, 'name') or (hasattr(chat, 'name') and chat.name is None): + setattr(chat, 'name', name) + + +def readVCardsFile(vcfFilePath, default_country_calling_code: str): + contacts = [] + with open(vcfFilePath, mode="r") as f: + reader = vobject.readComponents(f) + for row in reader: + if not hasattr(row, 'fn'): + continue + + if not hasattr(row, 'tel'): + continue + + contact: ExportedGoogleContactVCARDRawNumbers = { + "full_name": row.fn.value, + "numbers": list(map(lambda tel:tel.value, row.tel_list)), + } + + contacts.append(contact) + + step2 = createNumberToNameDicts(contacts, default_country_calling_code) + + return step2 + + +def filter_dict_by_prefix(d, prefix: str): + return {k: v for k, v in d.items() if k.startswith(prefix)} + +def createNumberToNameDicts(inContacts, default_country_calling_code: str): + outContacts = list(itertools.chain.from_iterable( + [[normalize_number(num, default_country_calling_code), f"{contact['full_name']} ({i+1})" if len(contact['numbers']) > 1 else contact['full_name']] + for i, num in enumerate(contact['numbers'])] + for contact in inContacts + )) + + return outContacts + +class ExportedGoogleContactVCARDRawNumbers(TypedDict): + full_name: str + numbers: List[str] + +def normalize_number(number: str, default_country_calling_code: str): + afterSomeCleaning = number.replace('(', '').replace(')', '').replace(' ', '').replace('-', '') + + # A number that starts with a + or 00 means it already have country_calling_code + if afterSomeCleaning.startswith('+'): + afterSomeCleaning = afterSomeCleaning.replace('+', '') + elif afterSomeCleaning.startswith('00'): + afterSomeCleaning = afterSomeCleaning[2:] + else: + # Remove leading zero + if afterSomeCleaning.startswith('0'): + afterSomeCleaning = afterSomeCleaning[1:] + + afterSomeCleaning = default_country_calling_code + afterSomeCleaning + + return afterSomeCleaning \ No newline at end of file diff --git a/Whatsapp_Chat_Exporter/contacts_names_from_vcards_test.py b/Whatsapp_Chat_Exporter/contacts_names_from_vcards_test.py new file mode 100644 index 0000000..74205fc --- /dev/null +++ b/Whatsapp_Chat_Exporter/contacts_names_from_vcards_test.py @@ -0,0 +1,22 @@ +# from contacts_names_from_vcards import readVCardsFile + +from Whatsapp_Chat_Exporter.contacts_names_from_vcards import normalize_number, readVCardsFile + + +def test_readVCardsFile(): + l = readVCardsFile("contacts.vcf", "973") + + assert len(l) > 0 + +def test_createNumberToNameDicts(): + pass + +def test_fuzzy_match_numbers(): + pass + +def test_normalize_number(): + assert normalize_number('0531234567', '1') == '1531234567' + assert normalize_number('001531234567', '2') == '1531234567' + assert normalize_number('+1531234567', '34') == '1531234567' + assert normalize_number('053(123)4567', '34') == '34531234567' + assert normalize_number('0531-234-567', '58') == '58531234567' \ No newline at end of file diff --git a/Whatsapp_Chat_Exporter/utility.py b/Whatsapp_Chat_Exporter/utility.py index fc75ae8..ad8e1e1 100644 --- a/Whatsapp_Chat_Exporter/utility.py +++ b/Whatsapp_Chat_Exporter/utility.py @@ -344,3 +344,10 @@ class JidType(IntEnum): GROUP = 1 SYSTEM_BROADCAST = 5 STATUS = 11 + +def _is_message_empty(message): + return (message.data is None or message.data == "") and not message.media + +def is_chat_empty(chat: ChatStore): + is_empty = len(chat.messages) == 0 or all(_is_message_empty(f) for f in chat.messages.values()) + return is_empty diff --git a/setup.py b/setup.py index 98429ac..5fa5484 100644 --- a/setup.py +++ b/setup.py @@ -55,9 +55,10 @@ setuptools.setup( 'crypt12': ["pycryptodome"], 'crypt14': ["pycryptodome"], 'crypt15': ["pycryptodome", "javaobj-py3"], - 'all': ["pycryptodome", "javaobj-py3"], - 'everything': ["pycryptodome", "javaobj-py3"], - 'backup': ["pycryptodome", "javaobj-py3"] + 'all': ["pycryptodome", "javaobj-py3", "vobject"], + 'everything': ["pycryptodome", "javaobj-py3", "vobject"], + 'backup': ["pycryptodome", "javaobj-py3"], + 'vcards': ["vobject", "pycryptodome", "javaobj-py3"], }, entry_points={ "console_scripts": [