From 5ed260b0b743778a2eaff35a47d72e2d7365e18c Mon Sep 17 00:00:00 2001 From: glemco <32201227+glemco@users.noreply.github.com> Date: Mon, 16 Jun 2025 11:45:58 +0200 Subject: [PATCH] Add support for telegram JSON file format Add the --telegram command line argument that, combined with a JSON output, generates a Telegram compatible JSON file [1]. The JSON is per-chat, so the --telegram argument implies the --json-per-chat setting. I took a few shortcuts: * Contact and Ids are inferred from the chat id or phone numbers * All text is marked as plain (e.g. no markup or different types) * Only personal chats and private groups supported * Private groups are defined if the chat has a name * Various ids try to match the ones in WA but may require bulk edits [1] - https://core.telegram.org/import-export Fixes: https://github.com/KnugiHK/WhatsApp-Chat-Exporter/issues/152 --- Whatsapp_Chat_Exporter/__main__.py | 13 +++++- Whatsapp_Chat_Exporter/utility.py | 66 ++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+), 2 deletions(-) diff --git a/Whatsapp_Chat_Exporter/__main__.py b/Whatsapp_Chat_Exporter/__main__.py index f561bcb..05a84f9 100644 --- a/Whatsapp_Chat_Exporter/__main__.py +++ b/Whatsapp_Chat_Exporter/__main__.py @@ -15,6 +15,7 @@ from Whatsapp_Chat_Exporter.data_model import ChatCollection, ChatStore from Whatsapp_Chat_Exporter.utility import APPLE_TIME, CLEAR_LINE, Crypt, check_update from Whatsapp_Chat_Exporter.utility import readable_to_bytes, safe_name, bytes_to_readable from Whatsapp_Chat_Exporter.utility import import_from_json, incremental_merge, DbType +from Whatsapp_Chat_Exporter.utility import telegram_json_format from argparse import ArgumentParser, SUPPRESS from datetime import datetime from getpass import getpass @@ -148,6 +149,10 @@ def setup_argument_parser() -> ArgumentParser: '--pretty-print-json', dest='pretty_print_json', default=None, nargs='?', const=2, type=int, help="Pretty print the output JSON." ) + json_group.add_argument( + "--telegram", dest="telegram", default=False, action='store_true', + help="Output the JSON in a format compatible with Telegram export (implies json-per-chat)" + ) json_group.add_argument( "--per-chat", dest="json_per_chat", default=False, action='store_true', help="Output the JSON file per chat" @@ -648,7 +653,7 @@ def export_json(args, data: ChatCollection, contact_store=None) -> None: data = {jik: chat.to_json() for jik, chat in data.items()} # Export as a single file or per chat - if not args.json_per_chat: + if not args.json_per_chat and not args.telegram: export_single_json(args, data) else: export_multiple_json(args, data) @@ -684,9 +689,13 @@ def export_multiple_json(args, data: Dict) -> None: else: contact = jik.replace('+', '') + if args.telegram: + obj = telegram_json_format(jik, data[jik]) + else: + obj = {jik: data[jik]} with open(f"{json_path}/{safe_name(contact)}.json", "w") as f: file_content = json.dumps( - {jik: data[jik]}, + obj, ensure_ascii=not args.avoid_encoding_json, indent=args.pretty_print_json ) diff --git a/Whatsapp_Chat_Exporter/utility.py b/Whatsapp_Chat_Exporter/utility.py index a39af16..f53b88a 100644 --- a/Whatsapp_Chat_Exporter/utility.py +++ b/Whatsapp_Chat_Exporter/utility.py @@ -627,6 +627,72 @@ def safe_name(text: Union[str, bytes]) -> str: return "-".join(''.join(safe_chars).split()) +def get_from_string(msg: Dict, chat_id: str) -> str: + """Return the number or name for the sender""" + if msg["from_me"]: + return "Me" + if msg["sender"]: + return str(msg["sender"]) + return str(chat_id) + + +def get_from_id(msg: Dict, chat_id: str) -> str: + """Return the user id for the sender""" + if msg["from_me"]: + return "user00000" + if msg["sender"]: + return "user" + msg["sender"] + return f"user{chat_id}" + + +def get_reply_id(data: Dict, reply_key: str) -> Optional[str]: + """Get the id of the message corresponding to the reply""" + if not reply_key: + return None + for msg_id, msg in data["messages"].items(): + if msg["key_id"] == reply_key: + return int(msg_id) + return None + + +def telegram_json_format(jik: str, data: Dict) -> Dict: + """Convert the data to the Telegram export format""" + try: + chat_id = int(''.join([c for c in jik if c.isdigit()])) + except ValueError: + # not a real chat: e.g. statusbroadcast + chat_id = 0 + obj = { + "name": data["name"] if data["name"] else jik, + # TODO can we do better than this? + "type": "private_group" if data["name"] else "personal_chat", + "id": chat_id, + "messages": [ { + "id": int(msgId), + "type": "message", + "date": datetime.fromtimestamp(msg["timestamp"]).isoformat().split(".")[0], + "date_unixtime": int(msg["timestamp"]), + "from": get_from_string(msg, chat_id), + "from_id": get_from_id(msg, chat_id), + "reply_to_message_id": get_reply_id(data, msg["reply"]), + "text": msg["data"], + "text_entities": [ + { + # TODO this will lose formatting and different types + "type": "plain", + "text": msg["data"], + } + ], + } for msgId, msg in data["messages"].items()] + } + # remove empty messages and replies + for msg_id, msg in enumerate(obj["messages"]): + if not msg["reply_to_message_id"]: + del obj["messages"][msg_id]["reply_to_message_id"] + obj["messages"] = [m for m in obj["messages"] if m["text"]] + return obj + + class WhatsAppIdentifier(StrEnum): # AppDomainGroup-group.net.whatsapp.WhatsApp.shared-ChatStorage.sqlite MESSAGE = "7c7fba66680ef796b916b067077cc246adacf01d"