mirror of
https://github.com/KnugiHK/WhatsApp-Chat-Exporter.git
synced 2026-01-29 05:40:42 +00:00
Bnaya's assorted features
This commit is contained in:
11
.gitignore
vendored
11
.gitignore
vendored
@@ -134,3 +134,14 @@ dmypy.json
|
||||
*.onefile-build/
|
||||
*.exe
|
||||
__main__
|
||||
|
||||
|
||||
# Dev time intermidiates & temp files
|
||||
result/
|
||||
WhatsApp/
|
||||
/*.db
|
||||
/*.db-*
|
||||
/myout
|
||||
/msgstore.db
|
||||
/myout-json
|
||||
.vscode/
|
||||
@@ -7,10 +7,17 @@ import shutil
|
||||
import json
|
||||
import string
|
||||
import glob
|
||||
try:
|
||||
import vobject
|
||||
except ModuleNotFoundError:
|
||||
vcards_deps_installed = False
|
||||
else:
|
||||
vcards_deps_installed = True
|
||||
from Whatsapp_Chat_Exporter import exported_handler, android_handler
|
||||
from Whatsapp_Chat_Exporter import ios_handler, ios_media_handler
|
||||
from Whatsapp_Chat_Exporter.contacts_names_from_vcards import ContactsNamesFromVCards, readVCardsFile
|
||||
from Whatsapp_Chat_Exporter.data_model import ChatStore
|
||||
from Whatsapp_Chat_Exporter.utility import APPLE_TIME, Crypt, DbType
|
||||
from Whatsapp_Chat_Exporter.utility import APPLE_TIME, Crypt, DbType, is_chat_empty
|
||||
from Whatsapp_Chat_Exporter.utility import check_update, import_from_json
|
||||
from argparse import ArgumentParser, SUPPRESS
|
||||
from datetime import datetime
|
||||
@@ -85,6 +92,18 @@ def main():
|
||||
type=str,
|
||||
const="result.json",
|
||||
help="Save the result to a single JSON file (default if present: result.json)")
|
||||
parser.add_argument(
|
||||
'--avoidJSONEnsureAscii',
|
||||
dest='avoid_json_ensure_ascii',
|
||||
default=False,
|
||||
action='store_true',
|
||||
help="Don't encode non-ascii chars in the output json files")
|
||||
parser.add_argument(
|
||||
'--prettyPrintJson',
|
||||
dest='pretty_print_json',
|
||||
default=False,
|
||||
action='store_true',
|
||||
help="Pretty print the output json")
|
||||
parser.add_argument(
|
||||
'-d',
|
||||
'--db',
|
||||
@@ -239,6 +258,13 @@ def main():
|
||||
metavar="phone number",
|
||||
help="Exclude chats that match the supplied phone number"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--filter-empty",
|
||||
dest="filter_empty",
|
||||
default=False,
|
||||
action='store_true',
|
||||
help="Exclude empty chats or with zero messages with content"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--per-chat",
|
||||
dest="json_per_chat",
|
||||
@@ -253,6 +279,20 @@ def main():
|
||||
action='store_true',
|
||||
help="Create a copy of the media seperated per chat in <MEDIA>/separated/ directory"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--enrich-names-from-vcards",
|
||||
dest="enrich_names_from_vcards",
|
||||
default=None,
|
||||
help="Path to an exported vcf file from google contacts export, add names missing from wab database"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--default-country-code-for-enrich-names-from-vcards",
|
||||
dest="default_country_code_for_enrich_names_from_vcards",
|
||||
default=None,
|
||||
help="When numbers in enrich-names-from-vcards does not have country code, this will be used. 1 is for US, 66 for Thailand etc. most likely use the number of your own country"
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Check for updates
|
||||
@@ -317,9 +357,19 @@ def main():
|
||||
if not chat.isnumeric():
|
||||
parser.error("Enter a phone number in the chat filter. See https://wts.knugi.dev/docs?dest=chat")
|
||||
filter_chat = (args.filter_chat_include, args.filter_chat_exclude)
|
||||
if args.enrich_names_from_vcards is not None and args.default_country_code_for_enrich_names_from_vcards is None:
|
||||
parser.error("When --enrich-names-from-vcards is provided, you must also set --default-country-code-for-enrich-names-from-vcards")
|
||||
|
||||
data = {}
|
||||
|
||||
contacts_names_from_vcards_enricher = ContactsNamesFromVCards()
|
||||
|
||||
if args.enrich_names_from_vcards is not None:
|
||||
if not vcards_deps_installed:
|
||||
parser.error("To use --enrich-names-from-vcards, you must install whatsapp-chat-exporter[vcards]")
|
||||
|
||||
contacts_names_from_vcards_enricher.load_vcf_file(args.enrich_names_from_vcards, args.default_country_code_for_enrich_names_from_vcards)
|
||||
|
||||
if args.android:
|
||||
contacts = android_handler.contacts
|
||||
messages = android_handler.messages
|
||||
@@ -429,6 +479,12 @@ def main():
|
||||
if args.android:
|
||||
android_handler.calls(db, data, args.timezone_offset, filter_chat)
|
||||
if not args.no_html:
|
||||
if contacts_names_from_vcards_enricher.should_enrich_names_from_vCards():
|
||||
contacts_names_from_vcards_enricher.enrich_names_from_vCards(data)
|
||||
|
||||
if (args.filter_empty):
|
||||
data = {k: v for k, v in data.items() if not is_chat_empty(v)}
|
||||
|
||||
create_html(
|
||||
data,
|
||||
args.output,
|
||||
@@ -487,11 +543,18 @@ def main():
|
||||
)
|
||||
|
||||
if args.json and not args.import_json:
|
||||
if (args.filter_empty):
|
||||
data = {k: v for k, v in data.items() if not is_chat_empty(v)}
|
||||
|
||||
if contacts_names_from_vcards_enricher.should_enrich_names_from_vCards():
|
||||
contacts_names_from_vcards_enricher.enrich_names_from_vCards(data)
|
||||
|
||||
if isinstance(data[next(iter(data))], ChatStore):
|
||||
data = {jik: chat.to_json() for jik, chat in data.items()}
|
||||
|
||||
if not args.json_per_chat:
|
||||
with open(args.json, "w") as f:
|
||||
data = json.dumps(data)
|
||||
data = json.dumps(data, ensure_ascii=not args.avoid_json_ensure_ascii, indent=2 if args.pretty_print_json else None)
|
||||
print(f"\nWriting JSON file...({int(len(data)/1024/1024)}MB)")
|
||||
f.write(data)
|
||||
else:
|
||||
@@ -506,7 +569,8 @@ def main():
|
||||
else:
|
||||
contact = jik.replace('+', '')
|
||||
with open(f"{args.json}/{contact}.json", "w") as f:
|
||||
f.write(json.dumps(data[jik]))
|
||||
file_content_to_write = json.dumps(data[jik], ensure_ascii=not args.avoid_json_ensure_ascii, indent=2 if args.pretty_print_json else None)
|
||||
f.write(file_content_to_write)
|
||||
print(f"Writing JSON file...({index + 1}/{total})", end="\r")
|
||||
print()
|
||||
else:
|
||||
|
||||
@@ -158,6 +158,8 @@ def contacts(db, data):
|
||||
c.execute("""SELECT count() FROM wa_contacts""")
|
||||
total_row_number = c.fetchone()[0]
|
||||
print(f"Processing contacts...({total_row_number})")
|
||||
if total_row_number == 0:
|
||||
print("No contacts profiles found in database, consider using --enrich-names-from-vcards when exported contacts from google")
|
||||
|
||||
c.execute("""SELECT jid, COALESCE(display_name, wa_name) as display_name, status FROM wa_contacts; """)
|
||||
row = c.fetchone()
|
||||
|
||||
88
Whatsapp_Chat_Exporter/contacts_names_from_vcards.py
Normal file
88
Whatsapp_Chat_Exporter/contacts_names_from_vcards.py
Normal file
@@ -0,0 +1,88 @@
|
||||
import itertools
|
||||
from typing import List, TypedDict
|
||||
|
||||
try:
|
||||
import vobject
|
||||
except ModuleNotFoundError:
|
||||
vcards_deps_installed = False
|
||||
else:
|
||||
vcards_deps_installed = True
|
||||
|
||||
class ContactsNamesFromVCards:
|
||||
def __init__(self) -> None:
|
||||
self.l = []
|
||||
|
||||
def should_enrich_names_from_vCards(self):
|
||||
return len(self.l) > 0
|
||||
|
||||
def load_vcf_file(self, vcfFilePath: str, default_country_calling_code: str):
|
||||
if not vcards_deps_installed:
|
||||
raise Exception('Invariant: vobject is missing')
|
||||
self.l = readVCardsFile(vcfFilePath, default_country_calling_code)
|
||||
|
||||
def enrich_names_from_vCards(self, chats):
|
||||
for counter, (number, name) in enumerate(self.l):
|
||||
# short number must be a bad contact, lets skip it
|
||||
if len(number) <= 5:
|
||||
continue
|
||||
|
||||
for counter, chat in enumerate(filter_dict_by_prefix(chats, number).values()):
|
||||
if not hasattr(chat, 'name') or (hasattr(chat, 'name') and chat.name is None):
|
||||
setattr(chat, 'name', name)
|
||||
|
||||
|
||||
def readVCardsFile(vcfFilePath, default_country_calling_code: str):
|
||||
contacts = []
|
||||
with open(vcfFilePath, mode="r") as f:
|
||||
reader = vobject.readComponents(f)
|
||||
for row in reader:
|
||||
if not hasattr(row, 'fn'):
|
||||
continue
|
||||
|
||||
if not hasattr(row, 'tel'):
|
||||
continue
|
||||
|
||||
contact: ExportedGoogleContactVCARDRawNumbers = {
|
||||
"full_name": row.fn.value,
|
||||
"numbers": list(map(lambda tel:tel.value, row.tel_list)),
|
||||
}
|
||||
|
||||
contacts.append(contact)
|
||||
|
||||
step2 = createNumberToNameDicts(contacts, default_country_calling_code)
|
||||
|
||||
return step2
|
||||
|
||||
|
||||
def filter_dict_by_prefix(d, prefix: str):
|
||||
return {k: v for k, v in d.items() if k.startswith(prefix)}
|
||||
|
||||
def createNumberToNameDicts(inContacts, default_country_calling_code: str):
|
||||
outContacts = list(itertools.chain.from_iterable(
|
||||
[[normalize_number(num, default_country_calling_code), f"{contact['full_name']} ({i+1})" if len(contact['numbers']) > 1 else contact['full_name']]
|
||||
for i, num in enumerate(contact['numbers'])]
|
||||
for contact in inContacts
|
||||
))
|
||||
|
||||
return outContacts
|
||||
|
||||
class ExportedGoogleContactVCARDRawNumbers(TypedDict):
|
||||
full_name: str
|
||||
numbers: List[str]
|
||||
|
||||
def normalize_number(number: str, default_country_calling_code: str):
|
||||
afterSomeCleaning = number.replace('(', '').replace(')', '').replace(' ', '').replace('-', '')
|
||||
|
||||
# A number that starts with a + or 00 means it already have country_calling_code
|
||||
if afterSomeCleaning.startswith('+'):
|
||||
afterSomeCleaning = afterSomeCleaning.replace('+', '')
|
||||
elif afterSomeCleaning.startswith('00'):
|
||||
afterSomeCleaning = afterSomeCleaning[2:]
|
||||
else:
|
||||
# Remove leading zero
|
||||
if afterSomeCleaning.startswith('0'):
|
||||
afterSomeCleaning = afterSomeCleaning[1:]
|
||||
|
||||
afterSomeCleaning = default_country_calling_code + afterSomeCleaning
|
||||
|
||||
return afterSomeCleaning
|
||||
22
Whatsapp_Chat_Exporter/contacts_names_from_vcards_test.py
Normal file
22
Whatsapp_Chat_Exporter/contacts_names_from_vcards_test.py
Normal file
@@ -0,0 +1,22 @@
|
||||
# from contacts_names_from_vcards import readVCardsFile
|
||||
|
||||
from Whatsapp_Chat_Exporter.contacts_names_from_vcards import normalize_number, readVCardsFile
|
||||
|
||||
|
||||
def test_readVCardsFile():
|
||||
l = readVCardsFile("contacts.vcf", "973")
|
||||
|
||||
assert len(l) > 0
|
||||
|
||||
def test_createNumberToNameDicts():
|
||||
pass
|
||||
|
||||
def test_fuzzy_match_numbers():
|
||||
pass
|
||||
|
||||
def test_normalize_number():
|
||||
assert normalize_number('0531234567', '1') == '1531234567'
|
||||
assert normalize_number('001531234567', '2') == '1531234567'
|
||||
assert normalize_number('+1531234567', '34') == '1531234567'
|
||||
assert normalize_number('053(123)4567', '34') == '34531234567'
|
||||
assert normalize_number('0531-234-567', '58') == '58531234567'
|
||||
@@ -344,3 +344,10 @@ class JidType(IntEnum):
|
||||
GROUP = 1
|
||||
SYSTEM_BROADCAST = 5
|
||||
STATUS = 11
|
||||
|
||||
def _is_message_empty(message):
|
||||
return (message.data is None or message.data == "") and not message.media
|
||||
|
||||
def is_chat_empty(chat: ChatStore):
|
||||
is_empty = len(chat.messages) == 0 or all(_is_message_empty(f) for f in chat.messages.values())
|
||||
return is_empty
|
||||
|
||||
7
setup.py
7
setup.py
@@ -55,9 +55,10 @@ setuptools.setup(
|
||||
'crypt12': ["pycryptodome"],
|
||||
'crypt14': ["pycryptodome"],
|
||||
'crypt15': ["pycryptodome", "javaobj-py3"],
|
||||
'all': ["pycryptodome", "javaobj-py3"],
|
||||
'everything': ["pycryptodome", "javaobj-py3"],
|
||||
'backup': ["pycryptodome", "javaobj-py3"]
|
||||
'all': ["pycryptodome", "javaobj-py3", "vobject"],
|
||||
'everything': ["pycryptodome", "javaobj-py3", "vobject"],
|
||||
'backup': ["pycryptodome", "javaobj-py3"],
|
||||
'vcards': ["vobject", "pycryptodome", "javaobj-py3"],
|
||||
},
|
||||
entry_points={
|
||||
"console_scripts": [
|
||||
|
||||
Reference in New Issue
Block a user