mirror of
https://github.com/KnugiHK/WhatsApp-Chat-Exporter.git
synced 2026-01-29 05:40:42 +00:00
Create a script to process Brazilian numbers in vcards #127
This commit is contained in:
100
scripts/brazilian-number-processing.py
Normal file
100
scripts/brazilian-number-processing.py
Normal file
@@ -0,0 +1,100 @@
|
||||
import re
|
||||
import argparse
|
||||
|
||||
def process_phone_number(raw_phone):
|
||||
"""
|
||||
Process the raw phone string from the VCARD and return two formatted numbers:
|
||||
- The original formatted number, and
|
||||
- A modified formatted number with the extra (ninth) digit removed, if applicable.
|
||||
|
||||
Desired output:
|
||||
For a number with a 9-digit subscriber:
|
||||
Original: "+55 {area} {first 5 of subscriber}-{last 4 of subscriber}"
|
||||
Modified: "+55 {area} {subscriber[1:5]}-{subscriber[5:]}"
|
||||
For example, for an input that should represent "027912345678", the outputs are:
|
||||
"+55 27 91234-5678" and "+55 27 1234-5678"
|
||||
|
||||
This function handles numbers that may already include a "+55" prefix.
|
||||
It expects that after cleaning, a valid number (without the country code) should have either 10 digits
|
||||
(2 for area + 8 for subscriber) or 11 digits (2 for area + 9 for subscriber).
|
||||
If extra digits are present, it takes the last 11 (or 10) digits.
|
||||
"""
|
||||
# If the number starts with '+55', remove it for processing.
|
||||
number_to_process = raw_phone.strip()
|
||||
if number_to_process.startswith("+55"):
|
||||
number_to_process = number_to_process[3:].strip()
|
||||
|
||||
# Remove all non-digit characters.
|
||||
digits = re.sub(r'\D', '', number_to_process)
|
||||
|
||||
# Remove trunk zero if present.
|
||||
if digits.startswith("0"):
|
||||
digits = digits[1:]
|
||||
|
||||
# After cleaning, we expect a valid number to have either 10 or 11 digits.
|
||||
# If there are extra digits, use the last 11 (for a 9-digit subscriber) or last 10 (for an 8-digit subscriber).
|
||||
if len(digits) > 11:
|
||||
# Here, we assume the valid number is the last 11 digits.
|
||||
digits = digits[-11:]
|
||||
elif len(digits) == 12:
|
||||
# In some cases with an 8-digit subscriber, take the last 10 digits.
|
||||
digits = digits[-10:]
|
||||
|
||||
if len(digits) not in (10, 11):
|
||||
return None, None
|
||||
|
||||
area = digits[:2]
|
||||
subscriber = digits[2:]
|
||||
|
||||
if len(subscriber) == 9:
|
||||
# Format the original number (5-4 split, e.g., "91234-5678")
|
||||
orig_subscriber = f"{subscriber[:5]}-{subscriber[5:]}"
|
||||
# Create a modified version: drop the first digit of the subscriber to form an 8-digit subscriber (4-4 split)
|
||||
mod_subscriber = f"{subscriber[1:5]}-{subscriber[5:]}"
|
||||
original_formatted = f"+55 {area} {orig_subscriber}"
|
||||
modified_formatted = f"+55 {area} {mod_subscriber}"
|
||||
elif len(subscriber) == 8:
|
||||
original_formatted = f"+55 {area} {subscriber[:4]}-{subscriber[4:]}"
|
||||
modified_formatted = None
|
||||
|
||||
return original_formatted, modified_formatted
|
||||
|
||||
def process_vcard(input_vcard, output_vcard):
|
||||
with open(input_vcard, 'r', encoding='utf-8') as file:
|
||||
lines = file.readlines()
|
||||
|
||||
output_lines = []
|
||||
|
||||
# Regex to capture any telephone line.
|
||||
# It matches lines starting with "TEL:" or "TEL;TYPE=..." or with prefixes like "item1.TEL:".
|
||||
phone_pattern = re.compile(r'^(?P<prefix>(?:TEL(?:;TYPE=[^:]+)?|(?:.*\.)?TEL)):(?P<number>.*)$')
|
||||
|
||||
for line in lines:
|
||||
stripped_line = line.rstrip("\n")
|
||||
match = phone_pattern.match(stripped_line)
|
||||
if match:
|
||||
raw_phone = match.group("number").strip()
|
||||
orig_formatted, mod_formatted = process_phone_number(raw_phone)
|
||||
if orig_formatted:
|
||||
# Always output using the standardized prefix.
|
||||
output_lines.append(f"TEL;TYPE=CELL:{orig_formatted}\n")
|
||||
else:
|
||||
output_lines.append(line)
|
||||
if mod_formatted:
|
||||
output_lines.append(f"TEL;TYPE=CELL:{mod_formatted}\n")
|
||||
else:
|
||||
output_lines.append(line)
|
||||
|
||||
with open(output_vcard, 'w', encoding='utf-8') as file:
|
||||
file.writelines(output_lines)
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Process a VCARD file to standardize telephone entries and add a second TEL line with the modified number (removing the extra ninth digit) for contacts with 9-digit subscribers."
|
||||
)
|
||||
parser.add_argument('input_vcard', type=str, help='Input VCARD file')
|
||||
parser.add_argument('output_vcard', type=str, help='Output VCARD file')
|
||||
args = parser.parse_args()
|
||||
|
||||
process_vcard(args.input_vcard, args.output_vcard)
|
||||
print(f"VCARD processed and saved to {args.output_vcard}")
|
||||
Reference in New Issue
Block a user