Bnaya's assorted features

This commit is contained in:
Bnaya Peretz
2024-07-08 23:06:41 +03:00
committed by KnugiHK
parent 33763b5f41
commit be469aed93
7 changed files with 201 additions and 6 deletions

11
.gitignore vendored
View File

@@ -134,3 +134,14 @@ dmypy.json
*.onefile-build/
*.exe
__main__
# Dev time intermidiates & temp files
result/
WhatsApp/
/*.db
/*.db-*
/myout
/msgstore.db
/myout-json
.vscode/

View File

@@ -7,10 +7,17 @@ import shutil
import json
import string
import glob
try:
import vobject
except ModuleNotFoundError:
vcards_deps_installed = False
else:
vcards_deps_installed = True
from Whatsapp_Chat_Exporter import exported_handler, android_handler
from Whatsapp_Chat_Exporter import ios_handler, ios_media_handler
from Whatsapp_Chat_Exporter.contacts_names_from_vcards import ContactsNamesFromVCards, readVCardsFile
from Whatsapp_Chat_Exporter.data_model import ChatStore
from Whatsapp_Chat_Exporter.utility import APPLE_TIME, Crypt, DbType
from Whatsapp_Chat_Exporter.utility import APPLE_TIME, Crypt, DbType, is_chat_empty
from Whatsapp_Chat_Exporter.utility import check_update, import_from_json
from argparse import ArgumentParser, SUPPRESS
from datetime import datetime
@@ -85,6 +92,18 @@ def main():
type=str,
const="result.json",
help="Save the result to a single JSON file (default if present: result.json)")
parser.add_argument(
'--avoidJSONEnsureAscii',
dest='avoid_json_ensure_ascii',
default=False,
action='store_true',
help="Don't encode non-ascii chars in the output json files")
parser.add_argument(
'--prettyPrintJson',
dest='pretty_print_json',
default=False,
action='store_true',
help="Pretty print the output json")
parser.add_argument(
'-d',
'--db',
@@ -239,6 +258,13 @@ def main():
metavar="phone number",
help="Exclude chats that match the supplied phone number"
)
parser.add_argument(
"--filter-empty",
dest="filter_empty",
default=False,
action='store_true',
help="Exclude empty chats or with zero messages with content"
)
parser.add_argument(
"--per-chat",
dest="json_per_chat",
@@ -253,6 +279,20 @@ def main():
action='store_true',
help="Create a copy of the media seperated per chat in <MEDIA>/separated/ directory"
)
parser.add_argument(
"--enrich-names-from-vcards",
dest="enrich_names_from_vcards",
default=None,
help="Path to an exported vcf file from google contacts export, add names missing from wab database"
)
parser.add_argument(
"--default-country-code-for-enrich-names-from-vcards",
dest="default_country_code_for_enrich_names_from_vcards",
default=None,
help="When numbers in enrich-names-from-vcards does not have country code, this will be used. 1 is for US, 66 for Thailand etc. most likely use the number of your own country"
)
args = parser.parse_args()
# Check for updates
@@ -317,9 +357,19 @@ def main():
if not chat.isnumeric():
parser.error("Enter a phone number in the chat filter. See https://wts.knugi.dev/docs?dest=chat")
filter_chat = (args.filter_chat_include, args.filter_chat_exclude)
if args.enrich_names_from_vcards is not None and args.default_country_code_for_enrich_names_from_vcards is None:
parser.error("When --enrich-names-from-vcards is provided, you must also set --default-country-code-for-enrich-names-from-vcards")
data = {}
contacts_names_from_vcards_enricher = ContactsNamesFromVCards()
if args.enrich_names_from_vcards is not None:
if not vcards_deps_installed:
parser.error("To use --enrich-names-from-vcards, you must install whatsapp-chat-exporter[vcards]")
contacts_names_from_vcards_enricher.load_vcf_file(args.enrich_names_from_vcards, args.default_country_code_for_enrich_names_from_vcards)
if args.android:
contacts = android_handler.contacts
messages = android_handler.messages
@@ -429,6 +479,12 @@ def main():
if args.android:
android_handler.calls(db, data, args.timezone_offset, filter_chat)
if not args.no_html:
if contacts_names_from_vcards_enricher.should_enrich_names_from_vCards():
contacts_names_from_vcards_enricher.enrich_names_from_vCards(data)
if (args.filter_empty):
data = {k: v for k, v in data.items() if not is_chat_empty(v)}
create_html(
data,
args.output,
@@ -487,11 +543,18 @@ def main():
)
if args.json and not args.import_json:
if (args.filter_empty):
data = {k: v for k, v in data.items() if not is_chat_empty(v)}
if contacts_names_from_vcards_enricher.should_enrich_names_from_vCards():
contacts_names_from_vcards_enricher.enrich_names_from_vCards(data)
if isinstance(data[next(iter(data))], ChatStore):
data = {jik: chat.to_json() for jik, chat in data.items()}
if not args.json_per_chat:
with open(args.json, "w") as f:
data = json.dumps(data)
data = json.dumps(data, ensure_ascii=not args.avoid_json_ensure_ascii, indent=2 if args.pretty_print_json else None)
print(f"\nWriting JSON file...({int(len(data)/1024/1024)}MB)")
f.write(data)
else:
@@ -506,7 +569,8 @@ def main():
else:
contact = jik.replace('+', '')
with open(f"{args.json}/{contact}.json", "w") as f:
f.write(json.dumps(data[jik]))
file_content_to_write = json.dumps(data[jik], ensure_ascii=not args.avoid_json_ensure_ascii, indent=2 if args.pretty_print_json else None)
f.write(file_content_to_write)
print(f"Writing JSON file...({index + 1}/{total})", end="\r")
print()
else:

View File

@@ -158,6 +158,8 @@ def contacts(db, data):
c.execute("""SELECT count() FROM wa_contacts""")
total_row_number = c.fetchone()[0]
print(f"Processing contacts...({total_row_number})")
if total_row_number == 0:
print("No contacts profiles found in database, consider using --enrich-names-from-vcards when exported contacts from google")
c.execute("""SELECT jid, COALESCE(display_name, wa_name) as display_name, status FROM wa_contacts; """)
row = c.fetchone()

View File

@@ -0,0 +1,88 @@
import itertools
from typing import List, TypedDict
try:
import vobject
except ModuleNotFoundError:
vcards_deps_installed = False
else:
vcards_deps_installed = True
class ContactsNamesFromVCards:
def __init__(self) -> None:
self.l = []
def should_enrich_names_from_vCards(self):
return len(self.l) > 0
def load_vcf_file(self, vcfFilePath: str, default_country_calling_code: str):
if not vcards_deps_installed:
raise Exception('Invariant: vobject is missing')
self.l = readVCardsFile(vcfFilePath, default_country_calling_code)
def enrich_names_from_vCards(self, chats):
for counter, (number, name) in enumerate(self.l):
# short number must be a bad contact, lets skip it
if len(number) <= 5:
continue
for counter, chat in enumerate(filter_dict_by_prefix(chats, number).values()):
if not hasattr(chat, 'name') or (hasattr(chat, 'name') and chat.name is None):
setattr(chat, 'name', name)
def readVCardsFile(vcfFilePath, default_country_calling_code: str):
contacts = []
with open(vcfFilePath, mode="r") as f:
reader = vobject.readComponents(f)
for row in reader:
if not hasattr(row, 'fn'):
continue
if not hasattr(row, 'tel'):
continue
contact: ExportedGoogleContactVCARDRawNumbers = {
"full_name": row.fn.value,
"numbers": list(map(lambda tel:tel.value, row.tel_list)),
}
contacts.append(contact)
step2 = createNumberToNameDicts(contacts, default_country_calling_code)
return step2
def filter_dict_by_prefix(d, prefix: str):
return {k: v for k, v in d.items() if k.startswith(prefix)}
def createNumberToNameDicts(inContacts, default_country_calling_code: str):
outContacts = list(itertools.chain.from_iterable(
[[normalize_number(num, default_country_calling_code), f"{contact['full_name']} ({i+1})" if len(contact['numbers']) > 1 else contact['full_name']]
for i, num in enumerate(contact['numbers'])]
for contact in inContacts
))
return outContacts
class ExportedGoogleContactVCARDRawNumbers(TypedDict):
full_name: str
numbers: List[str]
def normalize_number(number: str, default_country_calling_code: str):
afterSomeCleaning = number.replace('(', '').replace(')', '').replace(' ', '').replace('-', '')
# A number that starts with a + or 00 means it already have country_calling_code
if afterSomeCleaning.startswith('+'):
afterSomeCleaning = afterSomeCleaning.replace('+', '')
elif afterSomeCleaning.startswith('00'):
afterSomeCleaning = afterSomeCleaning[2:]
else:
# Remove leading zero
if afterSomeCleaning.startswith('0'):
afterSomeCleaning = afterSomeCleaning[1:]
afterSomeCleaning = default_country_calling_code + afterSomeCleaning
return afterSomeCleaning

View File

@@ -0,0 +1,22 @@
# from contacts_names_from_vcards import readVCardsFile
from Whatsapp_Chat_Exporter.contacts_names_from_vcards import normalize_number, readVCardsFile
def test_readVCardsFile():
l = readVCardsFile("contacts.vcf", "973")
assert len(l) > 0
def test_createNumberToNameDicts():
pass
def test_fuzzy_match_numbers():
pass
def test_normalize_number():
assert normalize_number('0531234567', '1') == '1531234567'
assert normalize_number('001531234567', '2') == '1531234567'
assert normalize_number('+1531234567', '34') == '1531234567'
assert normalize_number('053(123)4567', '34') == '34531234567'
assert normalize_number('0531-234-567', '58') == '58531234567'

View File

@@ -344,3 +344,10 @@ class JidType(IntEnum):
GROUP = 1
SYSTEM_BROADCAST = 5
STATUS = 11
def _is_message_empty(message):
return (message.data is None or message.data == "") and not message.media
def is_chat_empty(chat: ChatStore):
is_empty = len(chat.messages) == 0 or all(_is_message_empty(f) for f in chat.messages.values())
return is_empty

View File

@@ -55,9 +55,10 @@ setuptools.setup(
'crypt12': ["pycryptodome"],
'crypt14': ["pycryptodome"],
'crypt15': ["pycryptodome", "javaobj-py3"],
'all': ["pycryptodome", "javaobj-py3"],
'everything': ["pycryptodome", "javaobj-py3"],
'backup': ["pycryptodome", "javaobj-py3"]
'all': ["pycryptodome", "javaobj-py3", "vobject"],
'everything': ["pycryptodome", "javaobj-py3", "vobject"],
'backup': ["pycryptodome", "javaobj-py3"],
'vcards': ["vobject", "pycryptodome", "javaobj-py3"],
},
entry_points={
"console_scripts": [