Add support for telegram JSON file format

Add the --telegram command line argument that, combined with a JSON
output, generates a Telegram compatible JSON file [1].

The JSON is per-chat, so the --telegram argument implies the
--json-per-chat setting.

I took a few shortcuts:
* Contact and Ids are inferred from the chat id or phone numbers
* All text is marked as plain (e.g. no markup or different types)
* Only personal chats and private groups supported
* Private groups are defined if the chat has a name
* Various ids try to match the ones in WA but may require bulk edits

[1] - https://core.telegram.org/import-export

Fixes: https://github.com/KnugiHK/WhatsApp-Chat-Exporter/issues/152
This commit is contained in:
glemco
2025-06-16 11:45:58 +02:00
parent 99213503c4
commit 5ed260b0b7
2 changed files with 77 additions and 2 deletions

View File

@@ -627,6 +627,72 @@ def safe_name(text: Union[str, bytes]) -> str:
return "-".join(''.join(safe_chars).split())
def get_from_string(msg: Dict, chat_id: str) -> str:
"""Return the number or name for the sender"""
if msg["from_me"]:
return "Me"
if msg["sender"]:
return str(msg["sender"])
return str(chat_id)
def get_from_id(msg: Dict, chat_id: str) -> str:
"""Return the user id for the sender"""
if msg["from_me"]:
return "user00000"
if msg["sender"]:
return "user" + msg["sender"]
return f"user{chat_id}"
def get_reply_id(data: Dict, reply_key: str) -> Optional[str]:
"""Get the id of the message corresponding to the reply"""
if not reply_key:
return None
for msg_id, msg in data["messages"].items():
if msg["key_id"] == reply_key:
return int(msg_id)
return None
def telegram_json_format(jik: str, data: Dict) -> Dict:
"""Convert the data to the Telegram export format"""
try:
chat_id = int(''.join([c for c in jik if c.isdigit()]))
except ValueError:
# not a real chat: e.g. statusbroadcast
chat_id = 0
obj = {
"name": data["name"] if data["name"] else jik,
# TODO can we do better than this?
"type": "private_group" if data["name"] else "personal_chat",
"id": chat_id,
"messages": [ {
"id": int(msgId),
"type": "message",
"date": datetime.fromtimestamp(msg["timestamp"]).isoformat().split(".")[0],
"date_unixtime": int(msg["timestamp"]),
"from": get_from_string(msg, chat_id),
"from_id": get_from_id(msg, chat_id),
"reply_to_message_id": get_reply_id(data, msg["reply"]),
"text": msg["data"],
"text_entities": [
{
# TODO this will lose formatting and different types
"type": "plain",
"text": msg["data"],
}
],
} for msgId, msg in data["messages"].items()]
}
# remove empty messages and replies
for msg_id, msg in enumerate(obj["messages"]):
if not msg["reply_to_message_id"]:
del obj["messages"][msg_id]["reply_to_message_id"]
obj["messages"] = [m for m in obj["messages"] if m["text"]]
return obj
class WhatsAppIdentifier(StrEnum):
# AppDomainGroup-group.net.whatsapp.WhatsApp.shared-ChatStorage.sqlite
MESSAGE = "7c7fba66680ef796b916b067077cc246adacf01d"