mirror of
https://github.com/KnugiHK/WhatsApp-Chat-Exporter.git
synced 2026-01-29 13:50:42 +00:00
Compare commits
344 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
bde3c18498 | ||
|
|
af3307825a | ||
|
|
9b34f7ea6d | ||
|
|
280a1186d8 | ||
|
|
30cff71e76 | ||
|
|
05d21e3e5a | ||
|
|
fb88c83ac4 | ||
|
|
ffb6aef96e | ||
|
|
77c5a3e20c | ||
|
|
7b0965ac1d | ||
|
|
d72b41da11 | ||
|
|
fed14ceb29 | ||
|
|
3e6fdaa126 | ||
|
|
04000c78e2 | ||
|
|
75c429fe22 | ||
|
|
9608fa387d | ||
|
|
fc9c76c34c | ||
|
|
87b1fcc038 | ||
|
|
fe88f1b837 | ||
|
|
af3d31f773 | ||
|
|
df67a549c0 | ||
|
|
884ccc4cc0 | ||
|
|
484910cf5c | ||
|
|
a83c8eb17f | ||
|
|
8ffa8cfcac | ||
|
|
8fcd50d21b | ||
|
|
f91c527676 | ||
|
|
f35bf24a5e | ||
|
|
e2684845b8 | ||
|
|
df3333f948 | ||
|
|
bd4ccbb8ac | ||
|
|
fb5a1c3e1f | ||
|
|
1760dea0f5 | ||
|
|
4fcb4df0a4 | ||
|
|
13904ea4d8 | ||
|
|
8069882dc5 | ||
|
|
d95b075ac0 | ||
|
|
ea01a727cf | ||
|
|
b2f679d975 | ||
|
|
0cf113561a | ||
|
|
80bdc4414a | ||
|
|
09e5e1a756 | ||
|
|
6e37061e71 | ||
|
|
b301dd22d0 | ||
|
|
5b97d6013a | ||
|
|
8f304f1c48 | ||
|
|
7bb2fb2420 | ||
|
|
83fefe585b | ||
|
|
4886587065 | ||
|
|
0423fdabda | ||
|
|
823ed663e7 | ||
|
|
be469aed93 | ||
|
|
b34045a59f | ||
|
|
3461ce3735 | ||
|
|
b0942d695b | ||
|
|
5449646a1b | ||
|
|
6370b81299 | ||
|
|
c69d053049 | ||
|
|
b01d81ddec | ||
|
|
7e2800d89a | ||
|
|
33763b5f41 | ||
|
|
f080e2d4ea | ||
|
|
00f666a3c0 | ||
|
|
2ca064d111 | ||
|
|
3b54ca9d28 | ||
|
|
03312da6ee | ||
|
|
c7e8a603c7 | ||
|
|
574b0393d8 | ||
|
|
baa79a7b74 | ||
|
|
d57ff29e71 | ||
|
|
2d4d934a91 | ||
|
|
9741cab078 | ||
|
|
1e7687f8e8 | ||
|
|
524b3a4034 | ||
|
|
1ab4b24fa0 | ||
|
|
8d003b217c | ||
|
|
d754e6c279 | ||
|
|
0eebbcff21 | ||
|
|
a569fb0875 | ||
|
|
6e8e0d7f59 | ||
|
|
c0a511adb3 | ||
|
|
e84640de1c | ||
|
|
20199ed794 | ||
|
|
f4e610a953 | ||
|
|
99a3a4bcd0 | ||
|
|
dedfce8feb | ||
|
|
54e0b43888 | ||
|
|
d5ea843286 | ||
|
|
b01fe0ab4a | ||
|
|
a7ccc3be66 | ||
|
|
07b1cf6a8a | ||
|
|
2b49ac2e41 | ||
|
|
2466e2542a | ||
|
|
af70f6f6f9 | ||
|
|
48c3fa965f | ||
|
|
472c18448c | ||
|
|
810d8c7c8b | ||
|
|
f80be81ee6 | ||
|
|
0fcaa946e6 | ||
|
|
1e7953e5fe | ||
|
|
481656fdeb | ||
|
|
3d155fb48f | ||
|
|
f659a8c171 | ||
|
|
3ffb63ed28 | ||
|
|
94956913e8 | ||
|
|
7b5a7419f1 | ||
|
|
d5cef051d3 | ||
|
|
f81f31d667 | ||
|
|
8c617b721f | ||
|
|
0d626519ec | ||
|
|
f39d448aa6 | ||
|
|
2dc433df7c | ||
|
|
75a8a2e8c5 | ||
|
|
3847836ed6 | ||
|
|
c27f5ee41c | ||
|
|
e6c43e7e35 | ||
|
|
c2fa18778f | ||
|
|
150180fdff | ||
|
|
86ea938323 | ||
|
|
7da71e84fe | ||
|
|
efd5ed80b2 | ||
|
|
efea1d6165 | ||
|
|
3082c83bc4 | ||
|
|
fc50415afd | ||
|
|
be4adadbd8 | ||
|
|
8eb05424fd | ||
|
|
380289d1c1 | ||
|
|
91ff882d15 | ||
|
|
5aad65fff7 | ||
|
|
decea88028 | ||
|
|
a08f44e6ed | ||
|
|
dbd1802dd6 | ||
|
|
b9f123fbea | ||
|
|
2944d00ca2 | ||
|
|
448ba892cc | ||
|
|
a5cb46e095 | ||
|
|
ee4e95c75f | ||
|
|
f488894942 | ||
|
|
269a59c1e2 | ||
|
|
d8b434e169 | ||
|
|
326b99d860 | ||
|
|
bd2f063cc0 | ||
|
|
736292538b | ||
|
|
d772efe779 | ||
|
|
282c99c7dd | ||
|
|
8dec2a7e97 | ||
|
|
f9dedc7930 | ||
|
|
e2f497dbb6 | ||
|
|
989bddca37 | ||
|
|
40d060628f | ||
|
|
032af6cdcf | ||
|
|
e243abe2a4 | ||
|
|
b8f0af5f31 | ||
|
|
030fef53e1 | ||
|
|
3ed269e17f | ||
|
|
1e3ee5e322 | ||
|
|
6c740e69a5 | ||
|
|
828c8a1a72 | ||
|
|
1fb8588752 | ||
|
|
6636210e4c | ||
|
|
cc0105647a | ||
|
|
4fa360a389 | ||
|
|
e5228855d2 | ||
|
|
db1cdf8189 | ||
|
|
08ce61e68e | ||
|
|
138dd5351f | ||
|
|
136152dc18 | ||
|
|
55bc62cdc1 | ||
|
|
d430c7bfba | ||
|
|
672b85474e | ||
|
|
525d88f2c6 | ||
|
|
b57087794a | ||
|
|
be316ebb89 | ||
|
|
1078f7e5f7 | ||
|
|
ba2a88067a | ||
|
|
af53ba978b | ||
|
|
d55a42a549 | ||
|
|
506f8e89f4 | ||
|
|
fff11b26a5 | ||
|
|
fa66ef3a52 | ||
|
|
0b93ae567e | ||
|
|
c62e07cb0a | ||
|
|
317d785d50 | ||
|
|
38c1e47be9 | ||
|
|
ff95625edf | ||
|
|
14854193bd | ||
|
|
67c1b43669 | ||
|
|
23046e01ba | ||
|
|
c366e656af | ||
|
|
41f45fb07c | ||
|
|
be9e790b12 | ||
|
|
bfdc68cd6a | ||
|
|
594a04adbc | ||
|
|
20b2eec047 | ||
|
|
011c8ff1e7 | ||
|
|
e4c47ea41f | ||
|
|
c344f05b05 | ||
|
|
88ef4989fc | ||
|
|
f7f6b01c86 | ||
|
|
a49a911e03 | ||
|
|
3443143744 | ||
|
|
5f6b764bb9 | ||
|
|
3940b2991f | ||
|
|
dc1df8a03e | ||
|
|
dd5ec2219c | ||
|
|
e0c2cf5f66 | ||
|
|
8cdb694a16 | ||
|
|
8294f06587 | ||
|
|
200dea218f | ||
|
|
df93033c6c | ||
|
|
8f90733da2 | ||
|
|
3fdf6d0818 | ||
|
|
2fa5c4268e | ||
|
|
ed658d78dc | ||
|
|
0280325b4a | ||
|
|
a42ec5d762 | ||
|
|
c419dd5d39 | ||
|
|
8d036a6d87 | ||
|
|
42435c38cc | ||
|
|
32caab7c40 | ||
|
|
0897dc2897 | ||
|
|
f63b180500 | ||
|
|
dbdfdaedcf | ||
|
|
0e802f4554 | ||
|
|
41dd5e545f | ||
|
|
8750315e8e | ||
|
|
e9499c3bb7 | ||
|
|
80c3ed11f6 | ||
|
|
7c78cce221 | ||
|
|
32a312d332 | ||
|
|
328f34e632 | ||
|
|
9ac8839ecc | ||
|
|
e7113d72d7 | ||
|
|
0a0ae8cf15 | ||
|
|
b1d8d173a2 | ||
|
|
3bd6f288ea | ||
|
|
bf06795962 | ||
|
|
20d8e1384a | ||
|
|
6fd0e61b64 | ||
|
|
bbb47cd839 | ||
|
|
c155064ae1 | ||
|
|
d4efd919f9 | ||
|
|
13d761286e | ||
|
|
a943808734 | ||
|
|
0495970c38 | ||
|
|
32b14dc392 | ||
|
|
6bea8d07f4 | ||
|
|
69fdb61bae | ||
|
|
e1f160fc7c | ||
|
|
3b7e02ba31 | ||
|
|
bdb7d80831 | ||
|
|
ea7e019adc | ||
|
|
c7a01bb9c0 | ||
|
|
7c0b90d458 | ||
|
|
84383e1d9d | ||
|
|
06a1d34567 | ||
|
|
b371587d65 | ||
|
|
3e7d7916a7 | ||
|
|
17997e840f | ||
|
|
640acb3f86 | ||
|
|
cdfaf69f7a | ||
|
|
ee5f8b82be | ||
|
|
b9fa36acb4 | ||
|
|
fb88124d21 | ||
|
|
b5effbd512 | ||
|
|
430a5eccb8 | ||
|
|
8f0511a6e2 | ||
|
|
8380487e44 | ||
|
|
45666d8878 | ||
|
|
2ba55719f1 | ||
|
|
2d23052758 | ||
|
|
10875060c9 | ||
|
|
0bb99d59e0 | ||
|
|
9178e5326b | ||
|
|
26320413e8 | ||
|
|
a275a0f40c | ||
|
|
4cb4ac3e7b | ||
|
|
4139cab00f | ||
|
|
c1964bc2cd | ||
|
|
dab0493354 | ||
|
|
e0c464c8d8 | ||
|
|
92d339d1c0 | ||
|
|
4d6c80b561 | ||
|
|
d46a42a097 | ||
|
|
7cd259143a | ||
|
|
726812a5f7 | ||
|
|
6fddc1c23a | ||
|
|
77ceaa25dd | ||
|
|
e09f18e2f2 | ||
|
|
23114572bd | ||
|
|
2f04b69f38 | ||
|
|
e7c246822b | ||
|
|
2a215d024f | ||
|
|
f267f53007 | ||
|
|
3a30dfc800 | ||
|
|
9600da59ae | ||
|
|
26b58843fb | ||
|
|
60575c7989 | ||
|
|
14b1cb7fde | ||
|
|
92b8903521 | ||
|
|
d3892a4e4f | ||
|
|
b37c13434e | ||
|
|
4b357d5ea9 | ||
|
|
6407ba2136 | ||
|
|
f87108dadc | ||
|
|
6ca7e81484 | ||
|
|
41d3659269 | ||
|
|
580eaddb24 | ||
|
|
77b4b784d3 | ||
|
|
d9a77e0eec | ||
|
|
876729eb81 | ||
|
|
48f667d02b | ||
|
|
422ab2f784 | ||
|
|
996ee65525 | ||
|
|
042f6f9024 | ||
|
|
507e88d9c3 | ||
|
|
60e1e7d3eb | ||
|
|
774fb6d781 | ||
|
|
3ef3b02230 | ||
|
|
07cc0f3571 | ||
|
|
a1319eb835 | ||
|
|
8cbb0af43a | ||
|
|
28c4a7b99f | ||
|
|
e4c9d42927 | ||
|
|
c274b6b1c0 | ||
|
|
eec739d7cf | ||
|
|
3d7dca0682 | ||
|
|
24f7837171 | ||
|
|
15201acbe6 | ||
|
|
6fd290efd8 | ||
|
|
691bfe31c8 | ||
|
|
64eb2bcb9d | ||
|
|
1bc4a8c5b9 | ||
|
|
8a621827ff | ||
|
|
227f438404 | ||
|
|
3e71817778 | ||
|
|
08c5979eed | ||
|
|
0e6319eb4e | ||
|
|
734bb78cd8 | ||
|
|
a522eb2034 | ||
|
|
9fe6a0d2a8 | ||
|
|
c73eabe2a4 | ||
|
|
1faf111e64 | ||
|
|
9140c07feb |
36
.github/ISSUE_TEMPLATE/bug_report.md
vendored
Normal file
36
.github/ISSUE_TEMPLATE/bug_report.md
vendored
Normal file
@@ -0,0 +1,36 @@
|
||||
---
|
||||
name: Bug report
|
||||
about: Create a report to help us improve
|
||||
title: "[BUG]"
|
||||
labels: ''
|
||||
assignees: ''
|
||||
|
||||
---
|
||||
|
||||
# Must have
|
||||
- WhatsApp version: [WhatsApp version]
|
||||
- OS: [Android/iOS] - [version]
|
||||
- Platform: [Linux/Windows/MacOS]
|
||||
- Branch and version: [main/dev] - [exporter version]
|
||||
|
||||
If it is an error yield by Python, please also provide the trackback
|
||||
```
|
||||
[trackback here]
|
||||
```
|
||||
|
||||
# Nice to have
|
||||
**Describe the bug**
|
||||
A clear and concise description of what the bug is.
|
||||
|
||||
**To Reproduce**
|
||||
Steps to reproduce the behavior:
|
||||
1. Go to '...'
|
||||
2. Click on '....'
|
||||
3. Scroll down to '....'
|
||||
4. See error
|
||||
|
||||
**Screenshots**
|
||||
If applicable, add screenshots to help explain your problem.
|
||||
|
||||
**Additional context**
|
||||
Add any other context about the problem here.
|
||||
17
.github/ISSUE_TEMPLATE/feature_request.md
vendored
Normal file
17
.github/ISSUE_TEMPLATE/feature_request.md
vendored
Normal file
@@ -0,0 +1,17 @@
|
||||
---
|
||||
name: Feature request
|
||||
about: Suggest an idea for this project
|
||||
title: "[FEATURE]"
|
||||
labels: ''
|
||||
assignees: ''
|
||||
|
||||
---
|
||||
|
||||
**Is your feature request related to a problem? Please describe.**
|
||||
A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
|
||||
|
||||
**Describe the solution you'd like**
|
||||
A clear and concise description of what you want to happen.
|
||||
|
||||
**Additional context**
|
||||
Add any other context or screenshots about the feature request here.
|
||||
82
.github/workflows/compile-binary.yml
vendored
Normal file
82
.github/workflows/compile-binary.yml
vendored
Normal file
@@ -0,0 +1,82 @@
|
||||
name: Compile standalone binary
|
||||
|
||||
on:
|
||||
release:
|
||||
types: [published]
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
linux:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.12'
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install pycryptodome javaobj-py3 ordered-set zstandard nuitka==2.3
|
||||
pip install .
|
||||
- name: Build binary with Nuitka
|
||||
run: |
|
||||
python -m nuitka --no-deployment-flag=self-execution --onefile --include-data-file=./Whatsapp_Chat_Exporter/whatsapp.html=./Whatsapp_Chat_Exporter/whatsapp.html --follow-imports Whatsapp_Chat_Exporter/__main__.py
|
||||
cp __main__.bin wtsexporter_linux_x64
|
||||
sha256sum wtsexporter_linux_x64
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: binary-linux
|
||||
path: |
|
||||
./wtsexporter_linux_x64
|
||||
|
||||
windows:
|
||||
runs-on: windows-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.12'
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install pycryptodome javaobj-py3 ordered-set zstandard nuitka==2.3
|
||||
pip install .
|
||||
- name: Build binary with Nuitka
|
||||
run: |
|
||||
python -m nuitka --no-deployment-flag=self-execution --onefile --include-data-file=./Whatsapp_Chat_Exporter/whatsapp.html=./Whatsapp_Chat_Exporter/whatsapp.html --assume-yes-for-downloads --follow-imports Whatsapp_Chat_Exporter\__main__.py
|
||||
copy __main__.exe wtsexporter_x64.exe
|
||||
Get-FileHash wtsexporter_x64.exe
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: binary-windows
|
||||
path: |
|
||||
.\wtsexporter_x64.exe
|
||||
|
||||
macos:
|
||||
runs-on: macos-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.12'
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install pycryptodome javaobj-py3 ordered-set zstandard nuitka==2.3
|
||||
pip install .
|
||||
- name: Build binary with Nuitka
|
||||
run: |
|
||||
python -m nuitka --no-deployment-flag=self-execution --onefile --include-data-file=./Whatsapp_Chat_Exporter/whatsapp.html=./Whatsapp_Chat_Exporter/whatsapp.html --follow-imports Whatsapp_Chat_Exporter/__main__.py
|
||||
cp __main__.bin wtsexporter_macos_x64
|
||||
shasum -a 256 wtsexporter_macos_x64
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: binary-macos
|
||||
path: |
|
||||
./wtsexporter_macos_x64
|
||||
37
.github/workflows/python-publish.yml
vendored
Normal file
37
.github/workflows/python-publish.yml
vendored
Normal file
@@ -0,0 +1,37 @@
|
||||
# This workflow will upload a Python Package using Twine when a release is created
|
||||
# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
|
||||
|
||||
# This workflow uses actions that are not certified by GitHub.
|
||||
# They are provided by a third-party and are governed by
|
||||
# separate terms of service, privacy policy, and support
|
||||
# documentation.
|
||||
|
||||
name: Upload Python Package
|
||||
|
||||
on:
|
||||
release:
|
||||
types: [published]
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
id-token: write
|
||||
|
||||
jobs:
|
||||
deploy:
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.x'
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install build
|
||||
- name: Build package
|
||||
run: python -m build
|
||||
- name: Publish package
|
||||
uses: pypa/gh-action-pypi-publish@release/v1
|
||||
18
.gitignore
vendored
18
.gitignore
vendored
@@ -127,3 +127,21 @@ dmypy.json
|
||||
|
||||
# Pyre type checker
|
||||
.pyre/
|
||||
|
||||
# Nuitka
|
||||
*.build/
|
||||
*.dist/
|
||||
*.onefile-build/
|
||||
*.exe
|
||||
__main__
|
||||
|
||||
|
||||
# Dev time intermidiates & temp files
|
||||
result/
|
||||
WhatsApp/
|
||||
/*.db
|
||||
/*.db-*
|
||||
/myout
|
||||
/msgstore.db
|
||||
/myout-json
|
||||
.vscode/
|
||||
2
LICENSE
2
LICENSE
@@ -1,6 +1,6 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2021 Knugi
|
||||
Copyright (c) 2021-2023 Knugi
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
|
||||
36
LICENSE.django
Normal file
36
LICENSE.django
Normal file
@@ -0,0 +1,36 @@
|
||||
The Whatsapp Chat Exporter is licensed under the MIT license. For more information,
|
||||
refer to the file LICENSE.
|
||||
|
||||
Whatsapp Chat Exporter incorporates code from Django, governed by the three-clause
|
||||
BSD license—a permissive open-source license. The copyright and license details are
|
||||
provided below to adhere to Django's terms.
|
||||
|
||||
------
|
||||
|
||||
Copyright (c) Django Software Foundation and individual contributors.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
3. Neither the name of Django nor the names of its contributors may be used
|
||||
to endorse or promote products derived from this software without
|
||||
specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
162
README.md
162
README.md
@@ -3,16 +3,23 @@
|
||||

|
||||
[](https://pypi.org/project/Whatsapp-Chat-Exporter/)
|
||||
|
||||
A customizable Android and iPhone Whatsapp database parser that will give you the history of your Whatsapp conversations in HTML and JSON.
|
||||
**If you plan to uninstall WhatsApp or delete your WhatsApp account, please make a backup of your WhatsApp database. You may want to use this exporter again on the same database in the future as the exporter develops**
|
||||
A customizable Android and iPhone Whatsapp database parser that will give you the history of your Whatsapp conversations in HTML and JSON. Inspired by [Telegram Chat Export Tool](https://telegram.org/blog/export-and-more).
|
||||
> [!TIP]
|
||||
> If you plan to uninstall WhatsApp or delete your WhatsApp account, please make a backup of your WhatsApp database. You may want to use this exporter again on the same database in the future as the exporter develops.
|
||||
|
||||
If you would like to support this project, all you need to do is to contribute or share this project! If you think otherwise and want to make a donation, please refer to the [Donation Guide](https://blog.knugi.com/DONATE.html).
|
||||
|
||||
# Usage
|
||||
**If you want to use the old release (< 0.5) of the exporter, please follow the [old usage guide](https://github.com/KnugiHK/Whatsapp-Chat-Exporter/blob/main/old_README.md#usage)**
|
||||
|
||||
> [!NOTE]
|
||||
> Usage in README may be removed in the future. Check the usage in [Wiki](https://github.com/KnugiHK/Whatsapp-Chat-Exporter/wiki)
|
||||
>
|
||||
> If you want to use the old release (< 0.5) of the exporter, please follow the [old usage guide](https://github.com/KnugiHK/Whatsapp-Chat-Exporter/wiki/Old-Usage#usage).
|
||||
|
||||
First, install the exporter by:
|
||||
```shell
|
||||
pip install whatsapp-chat-exporter
|
||||
pip install whatsapp-chat-exporter[android_backup] & :: Optional, if you want it to support decrypting Android WhatsApp backup.
|
||||
pip install whatsapp-chat-exporter[android_backup] :; # Optional, if you want it to support decrypting Android WhatsApp backup.
|
||||
```
|
||||
Then, create a working directory in somewhere you want
|
||||
```shell
|
||||
@@ -23,7 +30,7 @@ cd working_wts
|
||||
### Unencrypted WhatsApp database
|
||||
Extract the WhatsApp database with whatever means, one possible means is to use the [WhatsApp-Key-DB-Extractor](https://github.com/KnugiHK/WhatsApp-Key-DB-Extractor)
|
||||
|
||||
After you obtain your WhatsApp databse, copy the WhatsApp database and media folder to the working directory. The database is called msgstore.db. If you also want the name of your contacts, get the contact database, which is called wa.db. And copy the WhatsApp (Media) directory from your phone directly.
|
||||
After you obtain your WhatsApp database, copy the WhatsApp database and media folder to the working directory. The database is called msgstore.db. If you also want the name of your contacts, get the contact database, which is called wa.db. And copy the WhatsApp (Media) directory from your phone directly.
|
||||
|
||||
And now, you should have something like this in the working directory.
|
||||
|
||||
@@ -33,12 +40,27 @@ Simply invoke the following command from shell.
|
||||
```sh
|
||||
wtsexporter -a
|
||||
```
|
||||
#### Enriching Contact from vCard
|
||||
Usually, the default WhatsApp contact database extracted from your phone will contains the contact names and the exporter will use it to map your chats. However, some reported cases showed that the database could has never been populated.
|
||||
In this case, you can export your contacts to a vCard file from your phone or a cloud provider like Google Contacts. Then, install the necessary dependency and run the following command from the shell:
|
||||
```sh
|
||||
pip install whatsapp-chat-exporter["vcards"]
|
||||
wtsexporter -a --enrich-from-vcard contacts.vcf --default-country-code 852
|
||||
```
|
||||
|
||||
### Encrypted Android WhatsApp Backup
|
||||
In order to support the decryption, install pycryptodome if it is not installed
|
||||
```sh
|
||||
pip install pycryptodome
|
||||
pip install pycryptodome # Or
|
||||
pip install whatsapp-chat-exporter["android_backup"] # install along with this software
|
||||
```
|
||||
|
||||
> [!TIP]
|
||||
> Crypt15 is now the easiest way to decrypt a backup. If you have the 32 bytes hex key generated when you enable End-to-End encrypted backup, you can use it to decrypt the backup. If you do not have the 32 bytes hex key, you can still use the key file extracted just like extacting key file for Crypt12 and Crypt14 to decrypt the backup.
|
||||
|
||||
#### Crypt12 or Crypt14
|
||||
You will need the decryption key file from your phone. If you have root access, you can find it as `/data/data/com.whatsapp/files/key`. Otherwise, if you used WhatsApp-Key-DB-Extractor before, it will appear in the WhatsApp backup directory as `WhatsApp/Databases/.nomedia`.
|
||||
|
||||
Place the decryption key file (key) and the encrypted WhatsApp Backup (msgstore.db.crypt14) in the working directory. If you also want the name of your contacts, get the contact database, which is called wa.db. And copy the WhatsApp (Media) directory from your phone directly.
|
||||
|
||||
And now, you should have something like this in the working directory.
|
||||
@@ -50,20 +72,51 @@ Simply invoke the following command from shell.
|
||||
wtsexporter -a -k key -b msgstore.db.crypt14
|
||||
```
|
||||
|
||||
## Working with iPhone
|
||||
Do an iPhone Backup with iTunes first.
|
||||
### Encrypted iPhone Backup
|
||||
**If you are working on unencrypted iPhone backup, skip this**
|
||||
#### Crypt15 (End-to-End Encrypted Backup)
|
||||
To support Crypt15 backup, install javaobj-py3 if it is not installed
|
||||
```sh
|
||||
pip install javaobj-py3 # Or
|
||||
pip install whatsapp-chat-exporter["crypt15"] # install along with this software
|
||||
```
|
||||
Place the encrypted WhatsApp Backup (msgstore.db.crypt15) in the working directory. If you also want the name of your contacts, get the contact database, which is called wa.db. And copy the WhatsApp (Media) directory from your phone directly.
|
||||
If you do not have the 32 bytes hex key (64 hexdigits), place the decryption key file (encrypted_backup.key) extracted from Android. If you gave the 32 bytes hex key, simply put the key in the shell.
|
||||
|
||||
If you want to work on an encrypted iPhone Backup, you should install iphone_backup_decrypt from [KnugiHK/iphone_backup_decrypt](https://github.com/KnugiHK/iphone_backup_decrypt) before you run the extract_iphone_media.py.
|
||||
Now, you should have something like this in the working directory (if you do not have 32 bytes hex key).
|
||||
|
||||

|
||||
##### Extracting
|
||||
If you do not have 32 bytes hex key but have the key file available, simply invoke the following command from shell.
|
||||
```sh
|
||||
wtsexporter -a -k encrypted_backup.key -b msgstore.db.crypt15
|
||||
```
|
||||
If you have the 32 bytes hex key, simply put the hex key in the -k option and invoke the command from shell like this:
|
||||
```sh
|
||||
wtsexporter -a -k 432435053b5204b08e5c3823423399aa30ff061435ab89bc4e6713969cdaa5a8 -b msgstore.db.crypt15
|
||||
```
|
||||
|
||||
## Working with iOS/iPadOS (iPhone or iPad)
|
||||
Do an iPhone/iPad Backup with iTunes/Finder first.
|
||||
* iPhone backup on Mac: https://support.apple.com/HT211229
|
||||
* iPhone backup on Windows: https://support.apple.com/HT212156
|
||||
* iPad backup: https://support.apple.com/guide/ipad/ipad9a74df05xx/ipados
|
||||
### Encrypted iOS/iPadOS Backup
|
||||
> [!NOTE]
|
||||
> If you are working on unencrypted iOS/iPadOS backup, skip this.
|
||||
|
||||
If you want to work on an encrypted iOS/iPadOS Backup, you should install iphone_backup_decrypt from [KnugiHK/iphone_backup_decrypt](https://github.com/KnugiHK/iphone_backup_decrypt) before you run the extract_iphone_media.py.
|
||||
```sh
|
||||
pip install git+https://github.com/KnugiHK/iphone_backup_decrypt
|
||||
```
|
||||
### Extracting
|
||||
Simply invoke the following command from shell, remember to replace the username and device id correspondingly in the command.
|
||||
#### Windows
|
||||
```sh
|
||||
wtsexporter -i -b "C:\Users\[Username]\AppData\Roaming\Apple Computer\MobileSync\Backup\[device id]"
|
||||
```
|
||||
#### Mac
|
||||
```sh
|
||||
wtsexporter -i -b ~/Library/Application\ Support/MobileSync/Backup/[device id]
|
||||
```
|
||||
|
||||
## Results
|
||||
After extracting, you will get these:
|
||||
@@ -77,32 +130,83 @@ After extracting, you will get these:
|
||||
Invoke the wtsexporter with --help option will show you all options available.
|
||||
```sh
|
||||
> wtsexporter --help
|
||||
Usage: wtsexporter [options]
|
||||
usage: wtsexporter [-h] [-a] [-i] [-e EXPORTED] [-w WA] [-m MEDIA] [-b BACKUP] [-o OUTPUT] [-j [JSON]] [--avoid-encoding-json] [--pretty-print-json [PRETTY_PRINT_JSON]] [-d DB] [-k KEY] [-t TEMPLATE] [-s]
|
||||
[-c] [--offline OFFLINE] [--size [SIZE]] [--no-html] [--check-update] [--assume-first-as-me] [--no-avatar] [--import] [--business] [--wab WAB] [--time-offset {-12 to 14}] [--date DATE]
|
||||
[--date-format FORMAT] [--include [phone number ...]] [--exclude [phone number ...]] [--dont-filter-empty] [--per-chat] [--create-separated-media] [--decrypt-chunk-size DECRYPT_CHUNK_SIZE]
|
||||
[--enrich-from-vcards ENRICH_FROM_VCARDS] [--default-country-code DEFAULT_CONTRY_CODE] [--txt [TEXT_FORMAT]]
|
||||
|
||||
Options:
|
||||
--version show program's version number and exit
|
||||
A customizable Android and iOS/iPadOS WhatsApp database parser that will give you the history of your WhatsApp conversations in HTML and JSON. Android Backup Crypt12, Crypt14 and Crypt15 supported.
|
||||
|
||||
options:
|
||||
-h, --help show this help message and exit
|
||||
-a, --android Define the target as Android
|
||||
-i, --iphone Define the target as iPhone
|
||||
-w WA, --wa=WA Path to contact database
|
||||
-m MEDIA, --media=MEDIA
|
||||
Path to WhatsApp media folder
|
||||
-b BACKUP, --backup=BACKUP
|
||||
Path to Android (must be used together with -k)/iPhone
|
||||
WhatsApp backup
|
||||
-o OUTPUT, --output=OUTPUT
|
||||
Output to specific directory
|
||||
-j, --json Save the result to a single JSON file
|
||||
-d DB, --db=DB Path to database file
|
||||
-k KEY, --key=KEY Path to key file
|
||||
-t TEMPLATE, --template=TEMPLATE
|
||||
-i, --ios, --iphone Define the target as iPhone/iPad
|
||||
-e EXPORTED, --exported EXPORTED
|
||||
Define the target as exported chat file and specify the path to the file
|
||||
-w WA, --wa WA Path to contact database (default: wa.db/ContactsV2.sqlite)
|
||||
-m MEDIA, --media MEDIA
|
||||
Path to WhatsApp media folder (default: WhatsApp)
|
||||
-b BACKUP, --backup BACKUP
|
||||
Path to Android (must be used together with -k)/iOS WhatsApp backup
|
||||
-o OUTPUT, --output OUTPUT
|
||||
Output to specific directory (default: result)
|
||||
-j [JSON], --json [JSON]
|
||||
Save the result to a single JSON file (default if present: result.json)
|
||||
--avoid-encoding-json
|
||||
Don't encode non-ascii characters in the output JSON files
|
||||
--pretty-print-json [PRETTY_PRINT_JSON]
|
||||
Pretty print the output JSON.
|
||||
-d DB, --db DB Path to database file (default: msgstore.db/7c7fba66680ef796b916b067077cc246adacf01d)
|
||||
-k KEY, --key KEY Path to key file
|
||||
-t TEMPLATE, --template TEMPLATE
|
||||
Path to custom HTML template
|
||||
-s, --showkey Show the HEX key used to decrypt the database
|
||||
-c, --move-media Move the media directory to output directory if the flag is set, otherwise copy it
|
||||
--offline OFFLINE Relative path to offline static files
|
||||
--size [SIZE], --output-size [SIZE], --split [SIZE]
|
||||
Maximum (rough) size of a single output file in bytes, 0 for auto
|
||||
--no-html Do not output html files
|
||||
--check-update Check for updates (require Internet access)
|
||||
--assume-first-as-me Assume the first message in a chat as sent by me (must be used together with -e)
|
||||
--no-avatar Do not render avatar in HTML output
|
||||
--import Import JSON file and convert to HTML output
|
||||
--business Use Whatsapp Business default files (iOS only)
|
||||
--wab WAB, --wa-backup WAB
|
||||
Path to contact database in crypt15 format
|
||||
--time-offset {-12 to 14}
|
||||
Offset in hours (-12 to 14) for time displayed in the output
|
||||
--date DATE The date filter in specific format (inclusive)
|
||||
--date-format FORMAT The date format for the date filter
|
||||
--include [phone number ...]
|
||||
Include chats that match the supplied phone number
|
||||
--exclude [phone number ...]
|
||||
Exclude chats that match the supplied phone number
|
||||
--dont-filter-empty By default, the exporter will not render chats with no valid message. Setting this flag will cause the exporter to render those.
|
||||
--per-chat Output the JSON file per chat
|
||||
--create-separated-media
|
||||
Create a copy of the media seperated per chat in <MEDIA>/separated/ directory
|
||||
--decrypt-chunk-size DECRYPT_CHUNK_SIZE
|
||||
Specify the chunk size for decrypting iOS backup, which may affect the decryption speed.
|
||||
--enrich-from-vcards ENRICH_FROM_VCARDS
|
||||
Path to an exported vcf file from Google contacts export. Add names missing from WhatsApp's default database
|
||||
--default-country-code DEFAULT_CONTRY_CODE
|
||||
Use with --enrich-from-vcards. When numbers in the vcf file does not have a country code, this will be used. 1 is for US, 66 for Thailand etc. Most likely use the number of your own
|
||||
country
|
||||
--txt [TEXT_FORMAT] Export chats in text format similar to what WhatsApp officially provided (default if present: result/)
|
||||
|
||||
WhatsApp Chat Exporter: 0.10.5 Licensed with MIT. See https://wts.knugi.dev/docs?dest=osl for all open source licenses.
|
||||
```
|
||||
|
||||
# To do
|
||||
1. Reply in iPhone
|
||||
See [issues](https://github.com/KnugiHK/Whatsapp-Chat-Exporter/issues).
|
||||
|
||||
# Copyright
|
||||
This is a MIT licensed project.
|
||||
|
||||
The Telegram Desktop's export is the reference for whatsapp.html in this repo
|
||||
The Telegram Desktop's export is the reference for whatsapp.html in this repo.
|
||||
|
||||
`bplist.py` was released by Vladimir "Farcaller" Pouzanov under MIT license.
|
||||
|
||||
Please also refer to any files prefixed with `LICENSE` to obtain copies of the various licenses.
|
||||
|
||||
WhatsApp Chat Exporter is not affiliated, associated, authorized, endorsed by, or in any way officially connected with the WhatsApp LLC, or any of its subsidiaries or its affiliates. The official WhatsApp LLC website can be found at https://www.whatsapp.com/.
|
||||
|
||||
@@ -1 +1,3 @@
|
||||
__version__ = "0.7.0"
|
||||
#!/usr/bin/python3
|
||||
|
||||
__version__ = "0.10.5"
|
||||
|
||||
@@ -1,163 +1,608 @@
|
||||
from .__init__ import __version__
|
||||
from Whatsapp_Chat_Exporter import extract, extract_iphone
|
||||
from Whatsapp_Chat_Exporter import extract_iphone_media
|
||||
from optparse import OptionParser
|
||||
#!/usr/bin/python3
|
||||
|
||||
import io
|
||||
import os
|
||||
import sqlite3
|
||||
import shutil
|
||||
import json
|
||||
import string
|
||||
import glob
|
||||
try:
|
||||
import vobject
|
||||
except ModuleNotFoundError:
|
||||
vcards_deps_installed = False
|
||||
else:
|
||||
from Whatsapp_Chat_Exporter.vcards_contacts import ContactsFromVCards
|
||||
vcards_deps_installed = True
|
||||
from Whatsapp_Chat_Exporter import exported_handler, android_handler
|
||||
from Whatsapp_Chat_Exporter import ios_handler, ios_media_handler
|
||||
from Whatsapp_Chat_Exporter.data_model import ChatStore
|
||||
from Whatsapp_Chat_Exporter.utility import APPLE_TIME, Crypt, DbType, chat_is_empty, readable_to_bytes
|
||||
from Whatsapp_Chat_Exporter.utility import check_update, import_from_json, sanitize_filename, bytes_to_readable
|
||||
from argparse import ArgumentParser, SUPPRESS
|
||||
from datetime import datetime
|
||||
from sys import exit
|
||||
try:
|
||||
from .__init__ import __version__
|
||||
except ImportError:
|
||||
from Whatsapp_Chat_Exporter.__init__ import __version__
|
||||
|
||||
|
||||
def main():
|
||||
parser = OptionParser(version=f"Whatsapp Chat Exporter: {__version__}")
|
||||
parser.add_option(
|
||||
parser = ArgumentParser(
|
||||
description = 'A customizable Android and iOS/iPadOS WhatsApp database parser that '
|
||||
'will give you the history of your WhatsApp conversations in HTML '
|
||||
'and JSON. Android Backup Crypt12, Crypt14 and Crypt15 supported.',
|
||||
epilog = f'WhatsApp Chat Exporter: {__version__} Licensed with MIT. See '
|
||||
'https://wts.knugi.dev/docs?dest=osl for all open source licenses.'
|
||||
)
|
||||
parser.add_argument(
|
||||
'-a',
|
||||
'--android',
|
||||
dest='android',
|
||||
default=False,
|
||||
action='store_true',
|
||||
help="Define the target as Android")
|
||||
parser.add_option(
|
||||
parser.add_argument(
|
||||
'-i',
|
||||
'--ios',
|
||||
'--iphone',
|
||||
dest='iphone',
|
||||
dest='ios',
|
||||
default=False,
|
||||
action='store_true',
|
||||
help="Define the target as iPhone")
|
||||
parser.add_option(
|
||||
help="Define the target as iPhone/iPad")
|
||||
parser.add_argument(
|
||||
"-e",
|
||||
"--exported",
|
||||
dest="exported",
|
||||
default=None,
|
||||
help="Define the target as exported chat file and specify the path to the file"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-w",
|
||||
"--wa",
|
||||
dest="wa",
|
||||
default=None,
|
||||
help="Path to contact database")
|
||||
parser.add_option(
|
||||
help="Path to contact database (default: wa.db/ContactsV2.sqlite)")
|
||||
parser.add_argument(
|
||||
"-m",
|
||||
"--media",
|
||||
dest="media",
|
||||
default=None,
|
||||
help="Path to WhatsApp media folder")
|
||||
parser.add_option(
|
||||
help="Path to WhatsApp media folder (default: WhatsApp)")
|
||||
parser.add_argument(
|
||||
"-b",
|
||||
"--backup",
|
||||
dest="backup",
|
||||
default=None,
|
||||
help="Path to Android (must be used together "
|
||||
"with -k)/iPhone WhatsApp backup")
|
||||
parser.add_option(
|
||||
"with -k)/iOS WhatsApp backup")
|
||||
parser.add_argument(
|
||||
"-o",
|
||||
"--output",
|
||||
dest="output",
|
||||
default="result",
|
||||
help="Output to specific directory")
|
||||
parser.add_option(
|
||||
help="Output to specific directory (default: result)")
|
||||
parser.add_argument(
|
||||
'-j',
|
||||
'--json',
|
||||
dest='json',
|
||||
nargs='?',
|
||||
default=None,
|
||||
type=str,
|
||||
const="result.json",
|
||||
help="Save the result to a single JSON file (default if present: result.json)")
|
||||
parser.add_argument(
|
||||
'--avoid-encoding-json',
|
||||
dest='avoid_encoding_json',
|
||||
default=False,
|
||||
action='store_true',
|
||||
help="Save the result to a single JSON file")
|
||||
parser.add_option(
|
||||
help="Don't encode non-ascii characters in the output JSON files")
|
||||
parser.add_argument(
|
||||
'--pretty-print-json',
|
||||
dest='pretty_print_json',
|
||||
default=None,
|
||||
nargs='?',
|
||||
const=2,
|
||||
type=int,
|
||||
help="Pretty print the output JSON.")
|
||||
parser.add_argument(
|
||||
'-d',
|
||||
'--db',
|
||||
dest='db',
|
||||
default=None,
|
||||
help="Path to database file")
|
||||
parser.add_option(
|
||||
help="Path to database file (default: msgstore.db/"
|
||||
"7c7fba66680ef796b916b067077cc246adacf01d)")
|
||||
parser.add_argument(
|
||||
'-k',
|
||||
'--key',
|
||||
dest='key',
|
||||
default=None,
|
||||
help="Path to key file"
|
||||
)
|
||||
parser.add_option(
|
||||
parser.add_argument(
|
||||
"-t",
|
||||
"--template",
|
||||
dest="template",
|
||||
default=None,
|
||||
help="Path to custom HTML template")
|
||||
(options, args) = parser.parse_args()
|
||||
help="Path to custom HTML template"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--embedded",
|
||||
dest="embedded",
|
||||
default=False,
|
||||
action='store_true',
|
||||
help=SUPPRESS or "Embed media into HTML file (not yet implemented)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-s",
|
||||
"--showkey",
|
||||
dest="showkey",
|
||||
default=False,
|
||||
action='store_true',
|
||||
help="Show the HEX key used to decrypt the database"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-c",
|
||||
"--move-media",
|
||||
dest="move_media",
|
||||
default=False,
|
||||
action='store_true',
|
||||
help="Move the media directory to output directory if the flag is set, otherwise copy it"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--offline",
|
||||
dest="offline",
|
||||
default=None,
|
||||
help="Relative path to offline static files"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--size",
|
||||
"--output-size",
|
||||
"--split",
|
||||
dest="size",
|
||||
nargs='?',
|
||||
const=0,
|
||||
default=None,
|
||||
help="Maximum (rough) size of a single output file in bytes, 0 for auto"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--no-html",
|
||||
dest="no_html",
|
||||
default=False,
|
||||
action='store_true',
|
||||
help="Do not output html files"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--check-update",
|
||||
dest="check_update",
|
||||
default=False,
|
||||
action='store_true',
|
||||
help="Check for updates (require Internet access)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--assume-first-as-me",
|
||||
dest="assume_first_as_me",
|
||||
default=False,
|
||||
action='store_true',
|
||||
help="Assume the first message in a chat as sent by me (must be used together with -e)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--no-avatar",
|
||||
dest="no_avatar",
|
||||
default=False,
|
||||
action='store_true',
|
||||
help="Do not render avatar in HTML output"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--import",
|
||||
dest="import_json",
|
||||
default=False,
|
||||
action='store_true',
|
||||
help="Import JSON file and convert to HTML output"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--business",
|
||||
dest="business",
|
||||
default=False,
|
||||
action='store_true',
|
||||
help="Use Whatsapp Business default files (iOS only)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--wab",
|
||||
"--wa-backup",
|
||||
dest="wab",
|
||||
default=None,
|
||||
help="Path to contact database in crypt15 format"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--time-offset",
|
||||
dest="timezone_offset",
|
||||
default=0,
|
||||
type=int,
|
||||
choices=range(-12, 15),
|
||||
metavar="{-12 to 14}",
|
||||
help="Offset in hours (-12 to 14) for time displayed in the output"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--date",
|
||||
dest="filter_date",
|
||||
default=None,
|
||||
metavar="DATE",
|
||||
help="The date filter in specific format (inclusive)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--date-format",
|
||||
dest="filter_date_format",
|
||||
default="%Y-%m-%d %H:%M",
|
||||
metavar="FORMAT",
|
||||
help="The date format for the date filter"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--include",
|
||||
dest="filter_chat_include",
|
||||
nargs='*',
|
||||
metavar="phone number",
|
||||
help="Include chats that match the supplied phone number"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--exclude",
|
||||
dest="filter_chat_exclude",
|
||||
nargs='*',
|
||||
metavar="phone number",
|
||||
help="Exclude chats that match the supplied phone number"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--dont-filter-empty",
|
||||
dest="filter_empty",
|
||||
default=True,
|
||||
action='store_false',
|
||||
help="By default, the exporter will not render chats with no valid message. Setting this flag will cause the exporter to render those."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--per-chat",
|
||||
dest="json_per_chat",
|
||||
default=False,
|
||||
action='store_true',
|
||||
help="Output the JSON file per chat"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--create-separated-media",
|
||||
dest="separate_media",
|
||||
default=False,
|
||||
action='store_true',
|
||||
help="Create a copy of the media seperated per chat in <MEDIA>/separated/ directory"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--decrypt-chunk-size",
|
||||
dest="decrypt_chunk_size",
|
||||
default=1 * 1024 * 1024,
|
||||
type=int,
|
||||
help="Specify the chunk size for decrypting iOS backup, which may affect the decryption speed."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--enrich-from-vcards",
|
||||
dest="enrich_from_vcards",
|
||||
default=None,
|
||||
help="Path to an exported vcf file from Google contacts export. Add names missing from WhatsApp's default database"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--default-country-code",
|
||||
dest="default_contry_code",
|
||||
default=None,
|
||||
help="Use with --enrich-from-vcards. When numbers in the vcf file does not have a country code, this will be used. 1 is for US, 66 for Thailand etc. Most likely use the number of your own country"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--txt",
|
||||
dest="text_format",
|
||||
nargs='?',
|
||||
default=None,
|
||||
type=str,
|
||||
const="result",
|
||||
help="Export chats in text format similar to what WhatsApp officially provided (default if present: result/)")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Check for updates
|
||||
if args.check_update:
|
||||
exit(check_update())
|
||||
|
||||
# Sanity checks
|
||||
if args.android and args.ios and args.exported and args.import_json:
|
||||
parser.error("You must define only one device type.")
|
||||
if not args.android and not args.ios and not args.exported and not args.import_json:
|
||||
parser.error("You must define the device type.")
|
||||
if args.no_html and not args.json and not args.text_format:
|
||||
parser.error("You must either specify a JSON output file, text file output directory or enable HTML output.")
|
||||
if args.import_json and (args.android or args.ios or args.exported or args.no_html):
|
||||
parser.error("You can only use --import with -j and without --no-html, -a, -i, -e.")
|
||||
elif args.import_json and not os.path.isfile(args.json):
|
||||
parser.error("JSON file not found.")
|
||||
if args.android and args.business:
|
||||
parser.error("WhatsApp Business is only available on iOS for now.")
|
||||
if args.json_per_chat and (
|
||||
(args.json[-5:] != ".json" and os.path.isfile(args.json)) or \
|
||||
(args.json[-5:] == ".json" and os.path.isfile(args.json[:-5]))
|
||||
):
|
||||
parser.error("When --per-chat is enabled, the destination of --json must be a directory.")
|
||||
if args.enrich_from_vcards is not None and args.default_contry_code is None:
|
||||
parser.error("When --enrich-from-vcards is provided, you must also set --default-country-code")
|
||||
if args.size is not None and not isinstance(args.size, int) and not args.size.isnumeric():
|
||||
try:
|
||||
args.size = readable_to_bytes(args.size)
|
||||
except ValueError:
|
||||
parser.error("The value for --split must be ended in pure bytes or with a proper unit (e.g., 1048576 or 1MB)")
|
||||
if args.filter_date is not None:
|
||||
if " - " in args.filter_date:
|
||||
start, end = args.filter_date.split(" - ")
|
||||
start = int(datetime.strptime(start, args.filter_date_format).timestamp())
|
||||
end = int(datetime.strptime(end, args.filter_date_format).timestamp())
|
||||
if start < 1009843200 or end < 1009843200:
|
||||
parser.error("WhatsApp was first released in 2009...")
|
||||
if start > end:
|
||||
parser.error("The start date cannot be a moment after the end date.")
|
||||
if args.android:
|
||||
args.filter_date = f"BETWEEN {start}000 AND {end}000"
|
||||
elif args.ios:
|
||||
args.filter_date = f"BETWEEN {start - APPLE_TIME} AND {end - APPLE_TIME}"
|
||||
else:
|
||||
_timestamp = int(datetime.strptime(args.filter_date[2:], args.filter_date_format).timestamp())
|
||||
if _timestamp < 1009843200:
|
||||
parser.error("WhatsApp was first released in 2009...")
|
||||
if args.filter_date[:2] == "> ":
|
||||
if args.android:
|
||||
args.filter_date = f">= {_timestamp}000"
|
||||
elif args.ios:
|
||||
args.filter_date = f">= {_timestamp - APPLE_TIME}"
|
||||
elif args.filter_date[:2] == "< ":
|
||||
if args.android:
|
||||
args.filter_date = f"<= {_timestamp}000"
|
||||
elif args.ios:
|
||||
args.filter_date = f"<= {_timestamp - APPLE_TIME}"
|
||||
else:
|
||||
parser.error("Unsupported date format. See https://wts.knugi.dev/docs?dest=date")
|
||||
if args.filter_chat_include is not None and args.filter_chat_exclude is not None:
|
||||
parser.error("Chat inclusion and exclusion filters cannot be used together.")
|
||||
if args.filter_chat_include is not None:
|
||||
for chat in args.filter_chat_include:
|
||||
if not chat.isnumeric():
|
||||
parser.error("Enter a phone number in the chat filter. See https://wts.knugi.dev/docs?dest=chat")
|
||||
if args.filter_chat_exclude is not None:
|
||||
for chat in args.filter_chat_exclude:
|
||||
if not chat.isnumeric():
|
||||
parser.error("Enter a phone number in the chat filter. See https://wts.knugi.dev/docs?dest=chat")
|
||||
filter_chat = (args.filter_chat_include, args.filter_chat_exclude)
|
||||
|
||||
if options.android and options.iphone:
|
||||
print("You must define only one device type.")
|
||||
exit()
|
||||
if not options.android and not options.iphone:
|
||||
print("You must define the device type.")
|
||||
exit()
|
||||
data = {}
|
||||
|
||||
if options.android:
|
||||
contacts = extract.contacts
|
||||
messages = extract.messages
|
||||
media = extract.media
|
||||
vcard = extract.vcard
|
||||
create_html = extract.create_html
|
||||
if options.db is None:
|
||||
if args.enrich_from_vcards is not None:
|
||||
if not vcards_deps_installed:
|
||||
parser.error(
|
||||
"You don't have the dependency to enrich contacts with vCard.\n"
|
||||
"Read more on how to deal with enriching contacts:\n"
|
||||
"https://github.com/KnugiHK/Whatsapp-Chat-Exporter/blob/main/README.md#usage"
|
||||
)
|
||||
contact_store = ContactsFromVCards()
|
||||
contact_store.load_vcf_file(args.enrich_from_vcards, args.default_contry_code)
|
||||
|
||||
if args.android:
|
||||
contacts = android_handler.contacts
|
||||
messages = android_handler.messages
|
||||
media = android_handler.media
|
||||
vcard = android_handler.vcard
|
||||
create_html = android_handler.create_html
|
||||
if args.db is None:
|
||||
msg_db = "msgstore.db"
|
||||
else:
|
||||
msg_db = options.db
|
||||
if options.key is not None:
|
||||
if options.backup is None:
|
||||
print("You must specify the backup file with -b")
|
||||
return False
|
||||
print("Decryption key specified, decrypting WhatsApp backup...")
|
||||
key = open(options.key, "rb").read()
|
||||
db = open(options.backup, "rb").read()
|
||||
is_crypt14 = False if "crypt12" in options.backup else True
|
||||
if not extract.decrypt_backup(db, key, msg_db, is_crypt14):
|
||||
print("Dependencies of decrypt_backup are not "
|
||||
"present. For details, see README.md")
|
||||
return False
|
||||
if options.wa is None:
|
||||
msg_db = args.db
|
||||
if args.wa is None:
|
||||
contact_db = "wa.db"
|
||||
else:
|
||||
contact_db = options.wa
|
||||
if options.media is None:
|
||||
options.media = "WhatsApp"
|
||||
|
||||
if len(args) == 1:
|
||||
msg_db = args[0]
|
||||
contact_db = args.wa
|
||||
if args.key is not None:
|
||||
if args.backup is None:
|
||||
print("You must specify the backup file with -b")
|
||||
exit(1)
|
||||
print("Decryption key specified, decrypting WhatsApp backup...")
|
||||
if "crypt12" in args.backup:
|
||||
crypt = Crypt.CRYPT12
|
||||
elif "crypt14" in args.backup:
|
||||
crypt = Crypt.CRYPT14
|
||||
elif "crypt15" in args.backup:
|
||||
crypt = Crypt.CRYPT15
|
||||
if os.path.isfile(args.key):
|
||||
key = open(args.key, "rb")
|
||||
elif all(char in string.hexdigits for char in args.key):
|
||||
key = bytes.fromhex(args.key)
|
||||
db = open(args.backup, "rb").read()
|
||||
if args.wab:
|
||||
wab = open(args.wab, "rb").read()
|
||||
error_wa = android_handler.decrypt_backup(wab, key, contact_db, crypt, args.showkey, DbType.CONTACT)
|
||||
if isinstance(key, io.IOBase):
|
||||
key.seek(0)
|
||||
else:
|
||||
error_wa = 0
|
||||
error_message = android_handler.decrypt_backup(db, key, msg_db, crypt, args.showkey, DbType.MESSAGE)
|
||||
if error_wa != 0:
|
||||
error = error_wa
|
||||
elif error_message != 0:
|
||||
error = error_message
|
||||
else:
|
||||
error = 0
|
||||
if error != 0:
|
||||
if error == 1:
|
||||
print("Dependencies of decrypt_backup and/or extract_encrypted_key"
|
||||
" are not present. For details, see README.md.")
|
||||
exit(3)
|
||||
elif error == 2:
|
||||
print("Failed when decompressing the decrypted backup. "
|
||||
"Possibly incorrect offsets used in decryption.")
|
||||
exit(4)
|
||||
else:
|
||||
print("Unknown error occurred.", error)
|
||||
exit(5)
|
||||
if args.media is None:
|
||||
args.media = "WhatsApp"
|
||||
|
||||
if os.path.isfile(contact_db):
|
||||
with sqlite3.connect(contact_db) as db:
|
||||
db.row_factory = sqlite3.Row
|
||||
contacts(db, data)
|
||||
|
||||
elif options.iphone:
|
||||
messages = extract_iphone.messages
|
||||
media = extract_iphone.media
|
||||
vcard = extract_iphone.vcard
|
||||
create_html = extract_iphone.create_html
|
||||
if options.backup is not None:
|
||||
extract_iphone_media.extract_media(options.backup)
|
||||
if options.db is None:
|
||||
msg_db = "7c7fba66680ef796b916b067077cc246adacf01d"
|
||||
elif args.ios:
|
||||
import sys
|
||||
if "--iphone" in sys.argv:
|
||||
print(
|
||||
"WARNING: The --iphone flag is deprecated and will"
|
||||
"be removed in the future. Use --ios instead."
|
||||
)
|
||||
contacts = ios_handler.contacts
|
||||
messages = ios_handler.messages
|
||||
media = ios_handler.media
|
||||
vcard = ios_handler.vcard
|
||||
create_html = android_handler.create_html
|
||||
if args.business:
|
||||
from Whatsapp_Chat_Exporter.utility import WhatsAppBusinessIdentifier as identifiers
|
||||
else:
|
||||
msg_db = options.db
|
||||
if options.wa is None:
|
||||
from Whatsapp_Chat_Exporter.utility import WhatsAppIdentifier as identifiers
|
||||
if args.media is None:
|
||||
args.media = identifiers.DOMAIN
|
||||
if args.backup is not None:
|
||||
if not os.path.isdir(args.media):
|
||||
ios_media_handler.extract_media(args.backup, identifiers, args.decrypt_chunk_size)
|
||||
else:
|
||||
print("WhatsApp directory already exists, skipping WhatsApp file extraction.")
|
||||
if args.db is None:
|
||||
msg_db = identifiers.MESSAGE
|
||||
else:
|
||||
msg_db = args.db
|
||||
if args.wa is None:
|
||||
contact_db = "ContactsV2.sqlite"
|
||||
else:
|
||||
contact_db = options.wa
|
||||
if options.media is None:
|
||||
options.media = "Message"
|
||||
contact_db = args.wa
|
||||
if os.path.isfile(contact_db):
|
||||
with sqlite3.connect(contact_db) as db:
|
||||
db.row_factory = sqlite3.Row
|
||||
contacts(db, data)
|
||||
|
||||
if len(args) == 1:
|
||||
msg_db = args[0]
|
||||
if not args.exported and not args.import_json:
|
||||
if os.path.isfile(msg_db):
|
||||
with sqlite3.connect(msg_db) as db:
|
||||
db.row_factory = sqlite3.Row
|
||||
messages(db, data, args.media, args.timezone_offset, args.filter_date, filter_chat)
|
||||
media(db, data, args.media, args.filter_date, filter_chat, args.separate_media)
|
||||
vcard(db, data, args.media, args.filter_date, filter_chat)
|
||||
if args.android:
|
||||
android_handler.calls(db, data, args.timezone_offset, filter_chat)
|
||||
if not args.no_html:
|
||||
if args.enrich_from_vcards is not None and not contact_store.is_empty():
|
||||
contact_store.enrich_from_vcards(data)
|
||||
|
||||
if os.path.isfile(msg_db):
|
||||
with sqlite3.connect(msg_db) as db:
|
||||
messages(db, data)
|
||||
media(db, data, options.media)
|
||||
vcard(db, data)
|
||||
create_html(data, options.output, options.template)
|
||||
create_html(
|
||||
data,
|
||||
args.output,
|
||||
args.template,
|
||||
args.embedded,
|
||||
args.offline,
|
||||
args.size,
|
||||
args.no_avatar,
|
||||
args.filter_empty
|
||||
)
|
||||
else:
|
||||
print(
|
||||
"The message database does not exist. You may specify the path "
|
||||
"to database file with option -d or check your provided path."
|
||||
)
|
||||
exit(6)
|
||||
|
||||
if not os.path.isdir(f"{options.output}/{options.media}"):
|
||||
shutil.move(options.media, f"{options.output}/")
|
||||
if os.path.isdir(args.media):
|
||||
media_path = os.path.join(args.output, args.media)
|
||||
if os.path.isdir(media_path):
|
||||
print("\nWhatsApp directory already exists in output directory. Skipping...", end="\n")
|
||||
else:
|
||||
if not args.move_media:
|
||||
if os.path.isdir(media_path):
|
||||
print("\nWhatsApp directory already exists in output directory. Skipping...", end="\n")
|
||||
else:
|
||||
print("\nCopying media directory...", end="\n")
|
||||
shutil.copytree(args.media, media_path)
|
||||
else:
|
||||
try:
|
||||
shutil.move(args.media, f"{args.output}/")
|
||||
except PermissionError:
|
||||
print("\nCannot remove original WhatsApp directory. "
|
||||
"Perhaps the directory is opened?", end="\n")
|
||||
elif args.exported:
|
||||
exported_handler.messages(args.exported, data, args.assume_first_as_me)
|
||||
if not args.no_html:
|
||||
android_handler.create_html(
|
||||
data,
|
||||
args.output,
|
||||
args.template,
|
||||
args.embedded,
|
||||
args.offline,
|
||||
args.size,
|
||||
args.no_avatar,
|
||||
args.filter_empty
|
||||
)
|
||||
for file in glob.glob(r'*.*'):
|
||||
shutil.copy(file, args.output)
|
||||
elif args.import_json:
|
||||
import_from_json(args.json, data)
|
||||
android_handler.create_html(
|
||||
data,
|
||||
args.output,
|
||||
args.template,
|
||||
args.embedded,
|
||||
args.offline,
|
||||
args.size,
|
||||
args.no_avatar,
|
||||
args.filter_empty
|
||||
)
|
||||
|
||||
if options.json:
|
||||
with open("result.json", "w") as f:
|
||||
data = json.dumps(data)
|
||||
print(f"\nWriting JSON file...({int(len(data)/1024/1024)}MB)")
|
||||
f.write(data)
|
||||
if args.text_format:
|
||||
print("Writing text file...")
|
||||
android_handler.create_txt(data, args.text_format)
|
||||
|
||||
if args.json and not args.import_json:
|
||||
if args.filter_empty:
|
||||
data = {k: v for k, v in data.items() if not chat_is_empty(v)}
|
||||
|
||||
if args.enrich_from_vcards is not None and not contact_store.is_empty():
|
||||
contact_store.enrich_from_vcards(data)
|
||||
|
||||
if isinstance(data[next(iter(data))], ChatStore):
|
||||
data = {jik: chat.to_json() for jik, chat in data.items()}
|
||||
|
||||
if not args.json_per_chat:
|
||||
with open(args.json, "w") as f:
|
||||
data = json.dumps(
|
||||
data,
|
||||
ensure_ascii=not args.avoid_encoding_json,
|
||||
indent=args.pretty_print_json
|
||||
)
|
||||
print(f"\nWriting JSON file...({bytes_to_readable(len(data))})")
|
||||
f.write(data)
|
||||
else:
|
||||
if args.json[-5:] == ".json":
|
||||
args.json = args.json[:-5]
|
||||
total = len(data.keys())
|
||||
if not os.path.isdir(args.json):
|
||||
os.mkdir(args.json)
|
||||
for index, jik in enumerate(data.keys()):
|
||||
if data[jik]["name"] is not None:
|
||||
contact = data[jik]["name"].replace('/', '')
|
||||
else:
|
||||
contact = jik.replace('+', '')
|
||||
with open(f"{args.json}/{sanitize_filename(contact)}.json", "w") as f:
|
||||
file_content_to_write = json.dumps({jik: data[jik]}, ensure_ascii=not args.avoid_encoding_json, indent=2 if args.pretty_print_json else None)
|
||||
f.write(file_content_to_write)
|
||||
print(f"Writing JSON file...({index + 1}/{total})", end="\r")
|
||||
print()
|
||||
else:
|
||||
print()
|
||||
|
||||
print("Everything is done!")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
875
Whatsapp_Chat_Exporter/android_handler.py
Normal file
875
Whatsapp_Chat_Exporter/android_handler.py
Normal file
@@ -0,0 +1,875 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
import sqlite3
|
||||
import os
|
||||
import io
|
||||
import hmac
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
from mimetypes import MimeTypes
|
||||
from markupsafe import escape as htmle
|
||||
from hashlib import sha256
|
||||
from base64 import b64decode, b64encode
|
||||
from datetime import datetime
|
||||
from Whatsapp_Chat_Exporter.data_model import ChatStore, Message
|
||||
from Whatsapp_Chat_Exporter.utility import MAX_SIZE, ROW_SIZE, DbType, convert_time_unit, determine_metadata
|
||||
from Whatsapp_Chat_Exporter.utility import rendering, Crypt, Device, get_file_name, setup_template, JidType
|
||||
from Whatsapp_Chat_Exporter.utility import brute_force_offset, CRYPT14_OFFSETS, get_status_location
|
||||
from Whatsapp_Chat_Exporter.utility import get_chat_condition, slugify, bytes_to_readable, chat_is_empty
|
||||
|
||||
try:
|
||||
import zlib
|
||||
from Crypto.Cipher import AES
|
||||
except ModuleNotFoundError:
|
||||
support_backup = False
|
||||
else:
|
||||
support_backup = True
|
||||
try:
|
||||
import javaobj
|
||||
except ModuleNotFoundError:
|
||||
support_crypt15 = False
|
||||
else:
|
||||
support_crypt15 = True
|
||||
|
||||
|
||||
def _generate_hmac_of_hmac(key_stream):
|
||||
key = hmac.new(
|
||||
hmac.new(
|
||||
b'\x00' * 32,
|
||||
key_stream,
|
||||
sha256
|
||||
).digest(),
|
||||
b"backup encryption\x01",
|
||||
sha256
|
||||
)
|
||||
return key.digest(), key_stream
|
||||
|
||||
|
||||
def _extract_encrypted_key(keyfile):
|
||||
key_stream = b""
|
||||
for byte in javaobj.loads(keyfile):
|
||||
key_stream += byte.to_bytes(1, "big", signed=True)
|
||||
|
||||
return _generate_hmac_of_hmac(key_stream)
|
||||
|
||||
|
||||
def decrypt_backup(database, key, output, crypt=Crypt.CRYPT14, show_crypt15=False, db_type=DbType.MESSAGE):
|
||||
if not support_backup:
|
||||
return 1
|
||||
if isinstance(key, io.IOBase):
|
||||
key = key.read()
|
||||
if crypt is not Crypt.CRYPT15:
|
||||
t1 = key[30:62]
|
||||
if crypt is not Crypt.CRYPT15 and len(key) != 158:
|
||||
raise ValueError("The key file must be 158 bytes")
|
||||
# Determine the IV and database offsets
|
||||
if crypt == Crypt.CRYPT14:
|
||||
if len(database) < 191:
|
||||
raise ValueError("The crypt14 file must be at least 191 bytes")
|
||||
current_try = 0
|
||||
offsets = CRYPT14_OFFSETS[current_try]
|
||||
t2 = database[15:47]
|
||||
iv = database[offsets["iv"]:offsets["iv"] + 16]
|
||||
db_ciphertext = database[offsets["db"]:]
|
||||
elif crypt == Crypt.CRYPT12:
|
||||
if len(database) < 67:
|
||||
raise ValueError("The crypt12 file must be at least 67 bytes")
|
||||
t2 = database[3:35]
|
||||
iv = database[51:67]
|
||||
db_ciphertext = database[67:-20]
|
||||
elif crypt == Crypt.CRYPT15:
|
||||
if not support_crypt15:
|
||||
return 1
|
||||
if len(database) < 131:
|
||||
raise ValueError("The crypt15 file must be at least 131 bytes")
|
||||
t1 = t2 = None
|
||||
if db_type == DbType.MESSAGE:
|
||||
iv = database[8:24]
|
||||
db_offset = database[0] + 2 # Skip protobuf + protobuf size and backup type
|
||||
elif db_type == DbType.CONTACT:
|
||||
iv = database[7:23]
|
||||
db_offset = database[0] + 1 # Skip protobuf + protobuf size
|
||||
db_ciphertext = database[db_offset:]
|
||||
|
||||
if t1 != t2:
|
||||
raise ValueError("The signature of key file and backup file mismatch")
|
||||
|
||||
if crypt == Crypt.CRYPT15:
|
||||
if len(key) == 32:
|
||||
main_key, hex_key = _generate_hmac_of_hmac(key)
|
||||
else:
|
||||
main_key, hex_key = _extract_encrypted_key(key)
|
||||
if show_crypt15:
|
||||
hex_key = [hex_key.hex()[c:c+4] for c in range(0, len(hex_key.hex()), 4)]
|
||||
print("The HEX key of the crypt15 backup is: " + ' '.join(hex_key))
|
||||
else:
|
||||
main_key = key[126:]
|
||||
decompressed = False
|
||||
while not decompressed:
|
||||
cipher = AES.new(main_key, AES.MODE_GCM, iv)
|
||||
db_compressed = cipher.decrypt(db_ciphertext)
|
||||
try:
|
||||
db = zlib.decompress(db_compressed)
|
||||
except zlib.error:
|
||||
if crypt == Crypt.CRYPT14:
|
||||
current_try += 1
|
||||
if current_try < len(CRYPT14_OFFSETS):
|
||||
offsets = CRYPT14_OFFSETS[current_try]
|
||||
iv = database[offsets["iv"]:offsets["iv"] + 16]
|
||||
db_ciphertext = database[offsets["db"]:]
|
||||
continue
|
||||
else:
|
||||
print("Common offsets are not applicable to "
|
||||
"your backup. Trying to brute force it...")
|
||||
for start_iv, end_iv, start_db in brute_force_offset():
|
||||
iv = database[start_iv:end_iv]
|
||||
db_ciphertext = database[start_db:]
|
||||
cipher = AES.new(main_key, AES.MODE_GCM, iv)
|
||||
db_compressed = cipher.decrypt(db_ciphertext)
|
||||
try:
|
||||
db = zlib.decompress(db_compressed)
|
||||
except zlib.error:
|
||||
continue
|
||||
else:
|
||||
decompressed = True
|
||||
print(
|
||||
f"The offsets of your IV and database are {start_iv} and "
|
||||
f"{start_db}, respectively. To include your offsets in the "
|
||||
"program, please report it by creating an issue on GitHub: "
|
||||
"https://github.com/KnugiHK/Whatsapp-Chat-Exporter/discussions/47"
|
||||
)
|
||||
break
|
||||
if not decompressed:
|
||||
return 2
|
||||
else:
|
||||
return 3
|
||||
else:
|
||||
decompressed = True
|
||||
if db[0:6].upper() == b"SQLITE":
|
||||
with open(output, "wb") as f:
|
||||
f.write(db)
|
||||
return 0
|
||||
else:
|
||||
raise ValueError("The plaintext is not a SQLite database. Did you use the key to encrypt something...")
|
||||
|
||||
|
||||
def contacts(db, data):
|
||||
# Get contacts
|
||||
c = db.cursor()
|
||||
c.execute("""SELECT count() FROM wa_contacts""")
|
||||
total_row_number = c.fetchone()[0]
|
||||
if total_row_number == 0:
|
||||
print("No contacts profiles found in the default database, consider using --enrich-from-vcards for adopting names from exported contacts from Google")
|
||||
return False
|
||||
else:
|
||||
print(f"Processing contacts...({total_row_number})")
|
||||
|
||||
c.execute("""SELECT jid, COALESCE(display_name, wa_name) as display_name, status FROM wa_contacts; """)
|
||||
row = c.fetchone()
|
||||
while row is not None:
|
||||
data[row["jid"]] = ChatStore(Device.ANDROID, row["display_name"])
|
||||
if row["status"] is not None:
|
||||
data[row["jid"]].status = row["status"]
|
||||
row = c.fetchone()
|
||||
|
||||
|
||||
def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat):
|
||||
# Get message history
|
||||
c = db.cursor()
|
||||
try:
|
||||
c.execute(f"""SELECT count()
|
||||
FROM messages
|
||||
INNER JOIN jid
|
||||
ON messages.key_remote_jid = jid.raw_string
|
||||
WHERE 1=1
|
||||
{f'AND timestamp {filter_date}' if filter_date is not None else ''}
|
||||
{get_chat_condition(filter_chat[0], True, ["messages.key_remote_jid", "messages.remote_resource"], "jid", "android")}
|
||||
{get_chat_condition(filter_chat[1], False, ["messages.key_remote_jid", "messages.remote_resource"], "jid", "android")}""")
|
||||
|
||||
except sqlite3.OperationalError:
|
||||
c.execute(f"""SELECT count()
|
||||
FROM message
|
||||
LEFT JOIN chat
|
||||
ON chat._id = message.chat_row_id
|
||||
INNER JOIN jid
|
||||
ON jid._id = chat.jid_row_id
|
||||
LEFT JOIN jid jid_group
|
||||
ON jid_group._id = message.sender_jid_row_id
|
||||
WHERE 1=1
|
||||
{f'AND timestamp {filter_date}' if filter_date is not None else ''}
|
||||
{get_chat_condition(filter_chat[0], True, ["jid.raw_string", "jid_group.raw_string"], "jid", "android")}
|
||||
{get_chat_condition(filter_chat[1], False, ["jid.raw_string", "jid_group.raw_string"], "jid", "android")}""")
|
||||
total_row_number = c.fetchone()[0]
|
||||
print(f"Processing messages...(0/{total_row_number})", end="\r")
|
||||
|
||||
try:
|
||||
c.execute(f"""SELECT messages.key_remote_jid,
|
||||
messages._id,
|
||||
messages.key_from_me,
|
||||
messages.timestamp,
|
||||
messages.data,
|
||||
messages.status,
|
||||
messages.edit_version,
|
||||
messages.thumb_image,
|
||||
messages.remote_resource,
|
||||
CAST(messages.media_wa_type as INTEGER) as media_wa_type,
|
||||
messages.latitude,
|
||||
messages.longitude,
|
||||
messages_quotes.key_id as quoted,
|
||||
messages.key_id,
|
||||
messages_quotes.data as quoted_data,
|
||||
messages.media_caption,
|
||||
missed_call_logs.video_call,
|
||||
chat.subject as chat_subject,
|
||||
message_system.action_type,
|
||||
message_system_group.is_me_joined,
|
||||
jid_old.raw_string as old_jid,
|
||||
jid_new.raw_string as new_jid,
|
||||
jid_global.type as jid_type,
|
||||
group_concat(receipt_user.receipt_timestamp) as receipt_timestamp,
|
||||
group_concat(messages.received_timestamp) as received_timestamp,
|
||||
group_concat(receipt_user.read_timestamp) as read_timestamp,
|
||||
group_concat(receipt_user.played_timestamp) as played_timestamp,
|
||||
group_concat(messages.read_device_timestamp) as read_device_timestamp
|
||||
FROM messages
|
||||
LEFT JOIN messages_quotes
|
||||
ON messages.quoted_row_id = messages_quotes._id
|
||||
LEFT JOIN missed_call_logs
|
||||
ON messages._id = missed_call_logs.message_row_id
|
||||
INNER JOIN jid jid_global
|
||||
ON messages.key_remote_jid = jid_global.raw_string
|
||||
LEFT JOIN chat
|
||||
ON chat.jid_row_id = jid_global._id
|
||||
LEFT JOIN message_system
|
||||
ON message_system.message_row_id = messages._id
|
||||
LEFT JOIN message_system_group
|
||||
ON message_system_group.message_row_id = messages._id
|
||||
LEFT JOIN message_system_number_change
|
||||
ON message_system_number_change.message_row_id = messages._id
|
||||
LEFT JOIN jid jid_old
|
||||
ON jid_old._id = message_system_number_change.old_jid_row_id
|
||||
LEFT JOIN jid jid_new
|
||||
ON jid_new._id = message_system_number_change.new_jid_row_id
|
||||
LEFT JOIN receipt_user
|
||||
ON receipt_user.message_row_id = messages._id
|
||||
WHERE messages.key_remote_jid <> '-1'
|
||||
{f'AND messages.timestamp {filter_date}' if filter_date is not None else ''}
|
||||
{get_chat_condition(filter_chat[0], True, ["messages.key_remote_jid", "messages.remote_resource"], "jid_global", "android")}
|
||||
{get_chat_condition(filter_chat[1], False, ["messages.key_remote_jid", "messages.remote_resource"], "jid_global", "android")}
|
||||
GROUP BY messages._id
|
||||
ORDER BY messages.timestamp ASC;"""
|
||||
)
|
||||
except sqlite3.OperationalError:
|
||||
try:
|
||||
c.execute(f"""SELECT jid_global.raw_string as key_remote_jid,
|
||||
message._id,
|
||||
message.from_me as key_from_me,
|
||||
message.timestamp,
|
||||
message.text_data as data,
|
||||
message.status,
|
||||
message_future.version as edit_version,
|
||||
message_thumbnail.thumbnail as thumb_image,
|
||||
message_media.file_path as remote_resource,
|
||||
message_location.latitude,
|
||||
message_location.longitude,
|
||||
message_quoted.key_id as quoted,
|
||||
message.key_id,
|
||||
message_quoted.text_data as quoted_data,
|
||||
message.message_type as media_wa_type,
|
||||
jid_group.raw_string as group_sender_jid,
|
||||
chat.subject as chat_subject,
|
||||
missed_call_logs.video_call,
|
||||
message.sender_jid_row_id,
|
||||
message_system.action_type,
|
||||
message_system_group.is_me_joined,
|
||||
jid_old.raw_string as old_jid,
|
||||
jid_new.raw_string as new_jid,
|
||||
jid_global.type as jid_type,
|
||||
group_concat(receipt_user.receipt_timestamp) as receipt_timestamp,
|
||||
group_concat(message.received_timestamp) as received_timestamp,
|
||||
group_concat(receipt_user.read_timestamp) as read_timestamp,
|
||||
group_concat(receipt_user.played_timestamp) as played_timestamp
|
||||
FROM message
|
||||
LEFT JOIN message_quoted
|
||||
ON message_quoted.message_row_id = message._id
|
||||
LEFT JOIN message_location
|
||||
ON message_location.message_row_id = message._id
|
||||
LEFT JOIN message_media
|
||||
ON message_media.message_row_id = message._id
|
||||
LEFT JOIN message_thumbnail
|
||||
ON message_thumbnail.message_row_id = message._id
|
||||
LEFT JOIN message_future
|
||||
ON message_future.message_row_id = message._id
|
||||
LEFT JOIN chat
|
||||
ON chat._id = message.chat_row_id
|
||||
INNER JOIN jid jid_global
|
||||
ON jid_global._id = chat.jid_row_id
|
||||
LEFT JOIN jid jid_group
|
||||
ON jid_group._id = message.sender_jid_row_id
|
||||
LEFT JOIN missed_call_logs
|
||||
ON message._id = missed_call_logs.message_row_id
|
||||
LEFT JOIN message_system
|
||||
ON message_system.message_row_id = message._id
|
||||
LEFT JOIN message_system_group
|
||||
ON message_system_group.message_row_id = message._id
|
||||
LEFT JOIN message_system_number_change
|
||||
ON message_system_number_change.message_row_id = message._id
|
||||
LEFT JOIN jid jid_old
|
||||
ON jid_old._id = message_system_number_change.old_jid_row_id
|
||||
LEFT JOIN jid jid_new
|
||||
ON jid_new._id = message_system_number_change.new_jid_row_id
|
||||
LEFT JOIN receipt_user
|
||||
ON receipt_user.message_row_id = message._id
|
||||
WHERE key_remote_jid <> '-1'
|
||||
{f'AND message.timestamp {filter_date}' if filter_date is not None else ''}
|
||||
{get_chat_condition(filter_chat[0], True, ["key_remote_jid", "jid_group.raw_string"], "jid_global", "android")}
|
||||
{get_chat_condition(filter_chat[1], False, ["key_remote_jid", "jid_group.raw_string"], "jid_global", "android")}
|
||||
GROUP BY message._id;"""
|
||||
)
|
||||
except Exception as e:
|
||||
raise e
|
||||
else:
|
||||
table_message = True
|
||||
else:
|
||||
table_message = False
|
||||
i = 0
|
||||
while True:
|
||||
try:
|
||||
content = c.fetchone()
|
||||
except sqlite3.OperationalError:
|
||||
continue
|
||||
else:
|
||||
break
|
||||
while content is not None:
|
||||
if content["key_remote_jid"] not in data:
|
||||
data[content["key_remote_jid"]] = ChatStore(Device.ANDROID, content["chat_subject"])
|
||||
if content["key_remote_jid"] is None:
|
||||
continue # Not sure
|
||||
if "sender_jid_row_id" in content:
|
||||
sender_jid_row_id = content["sender_jid_row_id"]
|
||||
else:
|
||||
sender_jid_row_id = None
|
||||
message = Message(
|
||||
from_me=not sender_jid_row_id and content["key_from_me"],
|
||||
timestamp=content["timestamp"],
|
||||
time=content["timestamp"],
|
||||
key_id=content["key_id"],
|
||||
timezone_offset=timezone_offset
|
||||
)
|
||||
if isinstance(content["data"], bytes):
|
||||
message.data = ("The message is binary data and its base64 is "
|
||||
'<a href="https://gchq.github.io/CyberChef/#recipe=From_Base64'
|
||||
"('A-Za-z0-9%2B/%3D',true,false)Text_Encoding_Brute_Force"
|
||||
f"""('Decode')&input={b64encode(b64encode(content["data"])).decode()}">""")
|
||||
message.data += b64encode(content["data"]).decode("utf-8") + "</a>"
|
||||
message.safe = message.meta = True
|
||||
data[content["key_remote_jid"]].add_message(content["_id"], message)
|
||||
i += 1
|
||||
content = c.fetchone()
|
||||
continue
|
||||
if content["jid_type"] == JidType.GROUP and content["key_from_me"] == 0:
|
||||
name = fallback = None
|
||||
if table_message:
|
||||
if content["sender_jid_row_id"] > 0:
|
||||
_jid = content["group_sender_jid"]
|
||||
if _jid in data:
|
||||
name = data[_jid].name
|
||||
if "@" in _jid:
|
||||
fallback = _jid.split('@')[0]
|
||||
else:
|
||||
if content["remote_resource"] is not None:
|
||||
if content["remote_resource"] in data:
|
||||
name = data[content["remote_resource"]].name
|
||||
if "@" in content["remote_resource"]:
|
||||
fallback = content["remote_resource"].split('@')[0]
|
||||
|
||||
message.sender = name or fallback
|
||||
else:
|
||||
message.sender = None
|
||||
|
||||
if content["quoted"] is not None:
|
||||
message.reply = content["quoted"]
|
||||
if content["quoted_data"] is not None and len(content["quoted_data"]) > 200:
|
||||
message.quoted_data = content["quoted_data"][:201] + "..."
|
||||
else:
|
||||
message.quoted_data = content["quoted_data"]
|
||||
else:
|
||||
message.reply = None
|
||||
|
||||
if not table_message and content["media_caption"] is not None:
|
||||
# Old schema
|
||||
message.caption = content["media_caption"]
|
||||
elif table_message and content["media_wa_type"] == 1 and content["data"] is not None:
|
||||
# New schema
|
||||
message.caption = content["data"]
|
||||
else:
|
||||
message.caption = None
|
||||
|
||||
if content["status"] == 6: # 6 = Metadata, otherwise assume a message
|
||||
message.meta = True
|
||||
name = fallback = None
|
||||
if table_message:
|
||||
if content["sender_jid_row_id"] > 0:
|
||||
_jid = content["group_sender_jid"]
|
||||
if _jid in data:
|
||||
name = data[_jid].name
|
||||
if "@" in _jid:
|
||||
fallback = _jid.split('@')[0]
|
||||
else:
|
||||
name = "You"
|
||||
else:
|
||||
_jid = content["remote_resource"]
|
||||
if _jid is not None:
|
||||
if _jid in data:
|
||||
name = data[_jid].name
|
||||
if "@" in _jid:
|
||||
fallback = _jid.split('@')[0]
|
||||
else:
|
||||
name = "You"
|
||||
message.data = determine_metadata(content, name or fallback)
|
||||
if isinstance(message.data, str) and "<br>" in message.data:
|
||||
message.safe = True
|
||||
if message.data is None:
|
||||
if content["video_call"] is not None: # Missed call
|
||||
message.meta = True
|
||||
if content["video_call"] == 1:
|
||||
message.data = "A video call was missed"
|
||||
elif content["video_call"] == 0:
|
||||
message.data = "A voice call was missed"
|
||||
elif content["data"] is None and content["thumb_image"] is None:
|
||||
message.meta = True
|
||||
message.data = None
|
||||
else:
|
||||
# Real message
|
||||
message.sticker = content["media_wa_type"] == 20 # Sticker is a message
|
||||
if content["key_from_me"] == 1:
|
||||
if content["status"] == 5 and content["edit_version"] == 7 or table_message and content["media_wa_type"] == 15:
|
||||
msg = "Message deleted"
|
||||
message.meta = True
|
||||
else:
|
||||
if content["media_wa_type"] == 5:
|
||||
msg = f"Location shared: {content['latitude'], content['longitude']}"
|
||||
message.meta = True
|
||||
else:
|
||||
msg = content["data"]
|
||||
if msg is not None:
|
||||
if "\r\n" in msg:
|
||||
msg = msg.replace("\r\n", " <br>")
|
||||
if "\n" in msg:
|
||||
msg = msg.replace("\n", " <br>")
|
||||
else:
|
||||
if content["status"] == 0 and content["edit_version"] == 7 or table_message and content["media_wa_type"] == 15:
|
||||
msg = "Message deleted"
|
||||
message.meta = True
|
||||
else:
|
||||
if content["media_wa_type"] == 5:
|
||||
msg = f"Location shared: {content['latitude'], content['longitude']}"
|
||||
message.meta = True
|
||||
else:
|
||||
msg = content["data"]
|
||||
if msg is not None:
|
||||
if "\r\n" in msg:
|
||||
msg = msg.replace("\r\n", " <br>")
|
||||
if "\n" in msg:
|
||||
msg = msg.replace("\n", " <br>")
|
||||
message.data = msg
|
||||
|
||||
data[content["key_remote_jid"]].add_message(content["_id"], message)
|
||||
i += 1
|
||||
if i % 1000 == 0:
|
||||
print(f"Processing messages...({i}/{total_row_number})", end="\r")
|
||||
while True:
|
||||
try:
|
||||
content = c.fetchone()
|
||||
except sqlite3.OperationalError:
|
||||
continue
|
||||
else:
|
||||
break
|
||||
print(f"Processing messages...({total_row_number}/{total_row_number})", end="\r")
|
||||
|
||||
|
||||
def media(db, data, media_folder, filter_date, filter_chat, separate_media=True):
|
||||
# Get media
|
||||
c = db.cursor()
|
||||
try:
|
||||
c.execute(f"""SELECT count()
|
||||
FROM message_media
|
||||
INNER JOIN messages
|
||||
ON message_media.message_row_id = messages._id
|
||||
INNER JOIN jid
|
||||
ON messages.key_remote_jid = jid.raw_string
|
||||
WHERE 1=1
|
||||
{f'AND messages.timestamp {filter_date}' if filter_date is not None else ''}
|
||||
{get_chat_condition(filter_chat[0], True, ["messages.key_remote_jid", "remote_resource"], "jid", "android")}
|
||||
{get_chat_condition(filter_chat[1], False, ["messages.key_remote_jid", "remote_resource"], "jid", "android")}""")
|
||||
except sqlite3.OperationalError:
|
||||
c.execute(f"""SELECT count()
|
||||
FROM message_media
|
||||
INNER JOIN message
|
||||
ON message_media.message_row_id = message._id
|
||||
LEFT JOIN chat
|
||||
ON chat._id = message.chat_row_id
|
||||
INNER JOIN jid
|
||||
ON jid._id = chat.jid_row_id
|
||||
LEFT JOIN jid jid_group
|
||||
ON jid_group._id = message.sender_jid_row_id
|
||||
WHERE 1=1
|
||||
{f'AND message.timestamp {filter_date}' if filter_date is not None else ''}
|
||||
{get_chat_condition(filter_chat[0], True, ["jid.raw_string", "jid_group.raw_string"], "jid", "android")}
|
||||
{get_chat_condition(filter_chat[1], False, ["jid.raw_string", "jid_group.raw_string"], "jid", "android")}""")
|
||||
total_row_number = c.fetchone()[0]
|
||||
print(f"\nProcessing media...(0/{total_row_number})", end="\r")
|
||||
i = 0
|
||||
try:
|
||||
c.execute(f"""SELECT messages.key_remote_jid,
|
||||
message_row_id,
|
||||
file_path,
|
||||
message_url,
|
||||
mime_type,
|
||||
media_key,
|
||||
file_hash,
|
||||
thumbnail
|
||||
FROM message_media
|
||||
INNER JOIN messages
|
||||
ON message_media.message_row_id = messages._id
|
||||
LEFT JOIN media_hash_thumbnail
|
||||
ON message_media.file_hash = media_hash_thumbnail.media_hash
|
||||
INNER JOIN jid
|
||||
ON messages.key_remote_jid = jid.raw_string
|
||||
WHERE jid.type <> 7
|
||||
{f'AND messages.timestamp {filter_date}' if filter_date is not None else ''}
|
||||
{get_chat_condition(filter_chat[0], True, ["messages.key_remote_jid", "remote_resource"], "jid", "android")}
|
||||
{get_chat_condition(filter_chat[1], False, ["messages.key_remote_jid", "remote_resource"], "jid", "android")}
|
||||
ORDER BY messages.key_remote_jid ASC"""
|
||||
)
|
||||
except sqlite3.OperationalError:
|
||||
c.execute(f"""SELECT jid.raw_string as key_remote_jid,
|
||||
message_row_id,
|
||||
file_path,
|
||||
message_url,
|
||||
mime_type,
|
||||
media_key,
|
||||
file_hash,
|
||||
thumbnail
|
||||
FROM message_media
|
||||
INNER JOIN message
|
||||
ON message_media.message_row_id = message._id
|
||||
LEFT JOIN chat
|
||||
ON chat._id = message.chat_row_id
|
||||
INNER JOIN jid
|
||||
ON jid._id = chat.jid_row_id
|
||||
LEFT JOIN media_hash_thumbnail
|
||||
ON message_media.file_hash = media_hash_thumbnail.media_hash
|
||||
LEFT JOIN jid jid_group
|
||||
ON jid_group._id = message.sender_jid_row_id
|
||||
WHERE jid.type <> 7
|
||||
{f'AND message.timestamp {filter_date}' if filter_date is not None else ''}
|
||||
{get_chat_condition(filter_chat[0], True, ["key_remote_jid", "jid_group.raw_string"], "jid", "android")}
|
||||
{get_chat_condition(filter_chat[1], False, ["key_remote_jid", "jid_group.raw_string"], "jid", "android")}
|
||||
ORDER BY jid.raw_string ASC"""
|
||||
)
|
||||
content = c.fetchone()
|
||||
mime = MimeTypes()
|
||||
if not os.path.isdir(f"{media_folder}/thumbnails"):
|
||||
Path(f"{media_folder}/thumbnails").mkdir(parents=True, exist_ok=True)
|
||||
while content is not None:
|
||||
file_path = f"{media_folder}/{content['file_path']}"
|
||||
message = data[content["key_remote_jid"]].messages[content["message_row_id"]]
|
||||
message.media = True
|
||||
if os.path.isfile(file_path):
|
||||
message.data = file_path
|
||||
if content["mime_type"] is None:
|
||||
guess = mime.guess_type(file_path)[0]
|
||||
if guess is not None:
|
||||
message.mime = guess
|
||||
else:
|
||||
message.mime = "application/octet-stream"
|
||||
else:
|
||||
message.mime = content["mime_type"]
|
||||
if separate_media:
|
||||
chat_display_name = slugify(data[content["key_remote_jid"]].name or message.sender \
|
||||
or content["key_remote_jid"].split('@')[0], True)
|
||||
current_filename = file_path.split("/")[-1]
|
||||
new_folder = os.path.join(media_folder, "separated", chat_display_name)
|
||||
Path(new_folder).mkdir(parents=True, exist_ok=True)
|
||||
new_path = os.path.join(new_folder, current_filename)
|
||||
shutil.copy2(file_path, new_path)
|
||||
message.data = new_path
|
||||
else:
|
||||
message.data = "The media is missing"
|
||||
message.mime = "media"
|
||||
message.meta = True
|
||||
if content["thumbnail"] is not None:
|
||||
thumb_path = f"{media_folder}/thumbnails/{b64decode(content['file_hash']).hex()}.png"
|
||||
if not os.path.isfile(thumb_path):
|
||||
with open(thumb_path, "wb") as f:
|
||||
f.write(content["thumbnail"])
|
||||
message.thumb = thumb_path
|
||||
i += 1
|
||||
if i % 100 == 0:
|
||||
print(f"Processing media...({i}/{total_row_number})", end="\r")
|
||||
content = c.fetchone()
|
||||
print(
|
||||
f"Processing media...({total_row_number}/{total_row_number})", end="\r")
|
||||
|
||||
|
||||
def vcard(db, data, media_folder, filter_date, filter_chat):
|
||||
c = db.cursor()
|
||||
try:
|
||||
c.execute(f"""SELECT message_row_id,
|
||||
messages.key_remote_jid,
|
||||
vcard,
|
||||
messages.media_name
|
||||
FROM messages_vcards
|
||||
INNER JOIN messages
|
||||
ON messages_vcards.message_row_id = messages._id
|
||||
INNER JOIN jid
|
||||
ON messages.key_remote_jid = jid.raw_string
|
||||
WHERE 1=1
|
||||
{f'AND messages.timestamp {filter_date}' if filter_date is not None else ''}
|
||||
{get_chat_condition(filter_chat[0], True, ["messages.key_remote_jid", "remote_resource"], "jid", "android")}
|
||||
{get_chat_condition(filter_chat[1], False, ["messages.key_remote_jid", "remote_resource"], "jid", "android")}
|
||||
ORDER BY messages.key_remote_jid ASC;"""
|
||||
)
|
||||
except sqlite3.OperationalError:
|
||||
c.execute(f"""SELECT message_row_id,
|
||||
jid.raw_string as key_remote_jid,
|
||||
vcard,
|
||||
message.text_data as media_name
|
||||
FROM message_vcard
|
||||
INNER JOIN message
|
||||
ON message_vcard.message_row_id = message._id
|
||||
LEFT JOIN chat
|
||||
ON chat._id = message.chat_row_id
|
||||
INNER JOIN jid
|
||||
ON jid._id = chat.jid_row_id
|
||||
LEFT JOIN jid jid_group
|
||||
ON jid_group._id = message.sender_jid_row_id
|
||||
WHERE 1=1
|
||||
{f'AND message.timestamp {filter_date}' if filter_date is not None else ''}
|
||||
{get_chat_condition(filter_chat[0], True, ["key_remote_jid", "jid_group.raw_string"], "jid", "android")}
|
||||
{get_chat_condition(filter_chat[1], False, ["key_remote_jid", "jid_group.raw_string"], "jid", "android")}
|
||||
ORDER BY message.chat_row_id ASC;"""
|
||||
)
|
||||
|
||||
rows = c.fetchall()
|
||||
total_row_number = len(rows)
|
||||
print(f"\nProcessing vCards...(0/{total_row_number})", end="\r")
|
||||
path = f"{media_folder}/vCards"
|
||||
if not os.path.isdir(path):
|
||||
Path(path).mkdir(parents=True, exist_ok=True)
|
||||
for index, row in enumerate(rows):
|
||||
media_name = row["media_name"] if row["media_name"] is not None else "Undefined vCard File"
|
||||
file_name = "".join(x for x in media_name if x.isalnum())
|
||||
file_name = file_name.encode('utf-8')[:230].decode('utf-8', 'ignore')
|
||||
file_path = os.path.join(path, f"{file_name}.vcf")
|
||||
if not os.path.isfile(file_path):
|
||||
with open(file_path, "w", encoding="utf-8") as f:
|
||||
f.write(row["vcard"])
|
||||
message = data[row["key_remote_jid"]].messages[row["message_row_id"]]
|
||||
message.data = "This media include the following vCard file(s):<br>" \
|
||||
f'<a href="{htmle(file_path)}">{htmle(media_name)}</a>'
|
||||
message.mime = "text/x-vcard"
|
||||
message.meta = True
|
||||
message.safe = True
|
||||
print(f"Processing vCards...({index + 1}/{total_row_number})", end="\r")
|
||||
|
||||
|
||||
def calls(db, data, timezone_offset, filter_chat):
|
||||
c = db.cursor()
|
||||
c.execute(f"""SELECT count()
|
||||
FROM call_log
|
||||
INNER JOIN jid
|
||||
ON call_log.jid_row_id = jid._id
|
||||
LEFT JOIN chat
|
||||
ON call_log.jid_row_id = chat.jid_row_id
|
||||
WHERE 1=1
|
||||
{get_chat_condition(filter_chat[0], True, ["jid.raw_string"])}
|
||||
{get_chat_condition(filter_chat[1], False, ["jid.raw_string"])}""")
|
||||
total_row_number = c.fetchone()[0]
|
||||
if total_row_number == 0:
|
||||
return
|
||||
print(f"\nProcessing calls...({total_row_number})", end="\r")
|
||||
c.execute(f"""SELECT call_log._id,
|
||||
jid.raw_string,
|
||||
from_me,
|
||||
call_id,
|
||||
timestamp,
|
||||
video_call,
|
||||
duration,
|
||||
call_result,
|
||||
bytes_transferred,
|
||||
chat.subject as chat_subject
|
||||
FROM call_log
|
||||
INNER JOIN jid
|
||||
ON call_log.jid_row_id = jid._id
|
||||
LEFT JOIN chat
|
||||
ON call_log.jid_row_id = chat.jid_row_id
|
||||
WHERE 1=1
|
||||
{get_chat_condition(filter_chat[0], True, ["jid.raw_string"])}
|
||||
{get_chat_condition(filter_chat[1], False, ["jid.raw_string"])}"""
|
||||
)
|
||||
chat = ChatStore(Device.ANDROID, "WhatsApp Calls")
|
||||
content = c.fetchone()
|
||||
while content is not None:
|
||||
call = Message(
|
||||
from_me=content["from_me"],
|
||||
timestamp=content["timestamp"],
|
||||
time=content["timestamp"],
|
||||
key_id=content["call_id"],
|
||||
timezone_offset=timezone_offset
|
||||
)
|
||||
_jid = content["raw_string"]
|
||||
name = data[_jid].name if _jid in data else content["chat_subject"] or None
|
||||
if _jid is not None and "@" in _jid:
|
||||
fallback = _jid.split('@')[0]
|
||||
else:
|
||||
fallback = None
|
||||
call.sender = name or fallback
|
||||
call.meta = True
|
||||
call.data = (
|
||||
f"A {'video' if content['video_call'] else 'voice'} "
|
||||
f"call {'to' if call.from_me else 'from'} "
|
||||
f"{call.sender} was "
|
||||
)
|
||||
if content['call_result'] in (0, 4, 7):
|
||||
call.data += "cancelled." if call.from_me else "missed."
|
||||
elif content['call_result'] == 2:
|
||||
call.data += "not answered." if call.from_me else "missed."
|
||||
elif content['call_result'] == 3:
|
||||
call.data += "unavailable."
|
||||
elif content['call_result'] == 5:
|
||||
call_time = convert_time_unit(content['duration'])
|
||||
call_bytes = bytes_to_readable(content['bytes_transferred'])
|
||||
call.data += (
|
||||
f"initiated and lasted for {call_time} "
|
||||
f"with {call_bytes} data transferred."
|
||||
)
|
||||
else:
|
||||
call.data += "in an unknown state."
|
||||
chat.add_message(content["_id"], call)
|
||||
content = c.fetchone()
|
||||
data["000000000000000"] = chat
|
||||
|
||||
|
||||
def create_html(
|
||||
data,
|
||||
output_folder,
|
||||
template=None,
|
||||
embedded=False,
|
||||
offline_static=False,
|
||||
maximum_size=None,
|
||||
no_avatar=False,
|
||||
filter_empty=True
|
||||
):
|
||||
template = setup_template(template, no_avatar)
|
||||
|
||||
total_row_number = len(data)
|
||||
print(f"\nGenerating chats...(0/{total_row_number})", end="\r")
|
||||
|
||||
if not os.path.isdir(output_folder):
|
||||
os.mkdir(output_folder)
|
||||
|
||||
w3css = get_status_location(output_folder, offline_static)
|
||||
|
||||
for current, contact in enumerate(data):
|
||||
chat = data[contact]
|
||||
if filter_empty and chat_is_empty(chat):
|
||||
continue
|
||||
safe_file_name, name = get_file_name(contact, chat)
|
||||
|
||||
if maximum_size is not None:
|
||||
current_size = 0
|
||||
current_page = 1
|
||||
render_box = []
|
||||
if maximum_size == 0:
|
||||
maximum_size = MAX_SIZE
|
||||
last_msg = chat.get_last_message().key_id
|
||||
for message in chat.get_messages():
|
||||
if message.data is not None and not message.meta and not message.media:
|
||||
current_size += len(message.data) + ROW_SIZE
|
||||
else:
|
||||
current_size += ROW_SIZE + 100 # Assume media and meta HTML are 100 bytes
|
||||
if current_size > maximum_size:
|
||||
output_file_name = f"{output_folder}/{safe_file_name}-{current_page}.html"
|
||||
rendering(
|
||||
output_file_name,
|
||||
template,
|
||||
name,
|
||||
render_box,
|
||||
contact,
|
||||
w3css,
|
||||
f"{safe_file_name}-{current_page + 1}.html",
|
||||
chat
|
||||
)
|
||||
render_box = [message]
|
||||
current_size = 0
|
||||
current_page += 1
|
||||
else:
|
||||
render_box.append(message)
|
||||
if message.key_id == last_msg:
|
||||
if current_page == 1:
|
||||
output_file_name = f"{output_folder}/{safe_file_name}.html"
|
||||
else:
|
||||
output_file_name = f"{output_folder}/{safe_file_name}-{current_page}.html"
|
||||
rendering(
|
||||
output_file_name,
|
||||
template,
|
||||
name,
|
||||
render_box,
|
||||
contact,
|
||||
w3css,
|
||||
False,
|
||||
chat
|
||||
)
|
||||
else:
|
||||
output_file_name = f"{output_folder}/{safe_file_name}.html"
|
||||
rendering(
|
||||
output_file_name,
|
||||
template,
|
||||
name,
|
||||
chat.get_messages(),
|
||||
contact,
|
||||
w3css,
|
||||
False,
|
||||
chat
|
||||
)
|
||||
if current % 10 == 0:
|
||||
print(f"Generating chats...({current}/{total_row_number})", end="\r")
|
||||
|
||||
print(f"Generating chats...({total_row_number}/{total_row_number})", end="\r")
|
||||
|
||||
|
||||
def create_txt(data, output):
|
||||
os.makedirs(output, exist_ok=True)
|
||||
for jik, chat in data.items():
|
||||
if chat.name is not None:
|
||||
contact = chat.name.replace('/', '')
|
||||
else:
|
||||
contact = jik.replace('+', '')
|
||||
output_file = os.path.join(output, f"{contact}.txt")
|
||||
with open(output_file, "w", encoding="utf8") as f:
|
||||
for message in chat.messages.values():
|
||||
date = datetime.fromtimestamp(message.timestamp).date()
|
||||
if message.meta and message.mime != "media":
|
||||
continue # Skip any metadata in text format
|
||||
if message.from_me:
|
||||
name = "You"
|
||||
else:
|
||||
name = message.sender if message.sender else contact
|
||||
prefix = f"[{date} {message.time}] {name}: "
|
||||
prefix_length = len(prefix)
|
||||
if message.media and ("/" in message.mime or message.mime == "media"):
|
||||
if message.data == "The media is missing":
|
||||
message_text = "<The media is missing>"
|
||||
else:
|
||||
message_text = f"<media file in {message.data}>"
|
||||
else:
|
||||
if message.data is None:
|
||||
message_text = ""
|
||||
else:
|
||||
message_text = message.data.replace('<br>', f'\n{" " * prefix_length}')
|
||||
if message.caption is not None:
|
||||
message_text += "\n" + ' ' * len(prefix) + message.caption.replace('<br>', f'\n{" " * prefix_length}')
|
||||
f.write(f"{prefix}{message_text}\n")
|
||||
|
||||
292
Whatsapp_Chat_Exporter/bplist.py
Normal file
292
Whatsapp_Chat_Exporter/bplist.py
Normal file
@@ -0,0 +1,292 @@
|
||||
#################################################################################
|
||||
# Copyright (C) 2009-2011 Vladimir "Farcaller" Pouzanov <farcaller@gmail.com> #
|
||||
# #
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy #
|
||||
# of this software and associated documentation files (the "Software"), to deal #
|
||||
# in the Software without restriction, including without limitation the rights #
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell #
|
||||
# copies of the Software, and to permit persons to whom the Software is #
|
||||
# furnished to do so, subject to the following conditions: #
|
||||
# #
|
||||
# The above copyright notice and this permission notice shall be included in #
|
||||
# all copies or substantial portions of the Software. #
|
||||
# #
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR #
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, #
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE #
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER #
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, #
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN #
|
||||
# THE SOFTWARE. #
|
||||
#################################################################################
|
||||
|
||||
import struct
|
||||
import codecs
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
class BPListWriter(object):
|
||||
def __init__(self, objects):
|
||||
self.bplist = ""
|
||||
self.objects = objects
|
||||
|
||||
def binary(self):
|
||||
'''binary -> string
|
||||
|
||||
Generates bplist
|
||||
'''
|
||||
self.data = 'bplist00'
|
||||
|
||||
# TODO: flatten objects and count max length size
|
||||
|
||||
# TODO: write objects and save offsets
|
||||
|
||||
# TODO: write offsets
|
||||
|
||||
# TODO: write metadata
|
||||
|
||||
return self.data
|
||||
|
||||
def write(self, filename):
|
||||
'''
|
||||
|
||||
Writes bplist to file
|
||||
'''
|
||||
if self.bplist != "":
|
||||
pass
|
||||
# TODO: save self.bplist to file
|
||||
else:
|
||||
raise Exception('BPlist not yet generated')
|
||||
|
||||
class BPListReader(object):
|
||||
def __init__(self, s):
|
||||
self.data = s
|
||||
self.objects = []
|
||||
self.resolved = {}
|
||||
|
||||
def __unpackIntStruct(self, sz, s):
|
||||
'''__unpackIntStruct(size, string) -> int
|
||||
|
||||
Unpacks the integer of given size (1, 2 or 4 bytes) from string
|
||||
'''
|
||||
if sz == 1:
|
||||
ot = '!B'
|
||||
elif sz == 2:
|
||||
ot = '!H'
|
||||
elif sz == 4:
|
||||
ot = '!I'
|
||||
elif sz == 8:
|
||||
ot = '!Q'
|
||||
else:
|
||||
raise Exception('int unpack size '+str(sz)+' unsupported')
|
||||
return struct.unpack(ot, s)[0]
|
||||
|
||||
def __unpackInt(self, offset):
|
||||
'''__unpackInt(offset) -> int
|
||||
|
||||
Unpacks int field from plist at given offset
|
||||
'''
|
||||
return self.__unpackIntMeta(offset)[1]
|
||||
|
||||
def __unpackIntMeta(self, offset):
|
||||
'''__unpackIntMeta(offset) -> (size, int)
|
||||
|
||||
Unpacks int field from plist at given offset and returns its size and value
|
||||
'''
|
||||
obj_header = self.data[offset]
|
||||
obj_type, obj_info = (obj_header & 0xF0), (obj_header & 0x0F)
|
||||
int_sz = 2**obj_info
|
||||
return int_sz, self.__unpackIntStruct(int_sz, self.data[offset+1:offset+1+int_sz])
|
||||
|
||||
def __resolveIntSize(self, obj_info, offset):
|
||||
'''__resolveIntSize(obj_info, offset) -> (count, offset)
|
||||
|
||||
Calculates count of objref* array entries and returns count and offset to first element
|
||||
'''
|
||||
if obj_info == 0x0F:
|
||||
ofs, obj_count = self.__unpackIntMeta(offset+1)
|
||||
objref = offset+2+ofs
|
||||
else:
|
||||
obj_count = obj_info
|
||||
objref = offset+1
|
||||
return obj_count, objref
|
||||
|
||||
def __unpackFloatStruct(self, sz, s):
|
||||
'''__unpackFloatStruct(size, string) -> float
|
||||
|
||||
Unpacks the float of given size (4 or 8 bytes) from string
|
||||
'''
|
||||
if sz == 4:
|
||||
ot = '!f'
|
||||
elif sz == 8:
|
||||
ot = '!d'
|
||||
else:
|
||||
raise Exception('float unpack size '+str(sz)+' unsupported')
|
||||
return struct.unpack(ot, s)[0]
|
||||
|
||||
def __unpackFloat(self, offset):
|
||||
'''__unpackFloat(offset) -> float
|
||||
|
||||
Unpacks float field from plist at given offset
|
||||
'''
|
||||
obj_header = self.data[offset]
|
||||
obj_type, obj_info = (obj_header & 0xF0), (obj_header & 0x0F)
|
||||
int_sz = 2**obj_info
|
||||
return int_sz, self.__unpackFloatStruct(int_sz, self.data[offset+1:offset+1+int_sz])
|
||||
|
||||
def __unpackDate(self, offset):
|
||||
td = int(struct.unpack(">d", self.data[offset+1:offset+9])[0])
|
||||
return datetime(year=2001,month=1,day=1) + timedelta(seconds=td)
|
||||
|
||||
def __unpackItem(self, offset):
|
||||
'''__unpackItem(offset)
|
||||
|
||||
Unpacks and returns an item from plist
|
||||
'''
|
||||
obj_header = self.data[offset]
|
||||
obj_type, obj_info = (obj_header & 0xF0), (obj_header & 0x0F)
|
||||
if obj_type == 0x00:
|
||||
if obj_info == 0x00: # null 0000 0000
|
||||
return None
|
||||
elif obj_info == 0x08: # bool 0000 1000 // false
|
||||
return False
|
||||
elif obj_info == 0x09: # bool 0000 1001 // true
|
||||
return True
|
||||
elif obj_info == 0x0F: # fill 0000 1111 // fill byte
|
||||
raise Exception("0x0F Not Implemented") # this is really pad byte, FIXME
|
||||
else:
|
||||
raise Exception('unpack item type '+str(obj_header)+' at '+str(offset)+ 'failed')
|
||||
elif obj_type == 0x10: # int 0001 nnnn ... // # of bytes is 2^nnnn, big-endian bytes
|
||||
return self.__unpackInt(offset)
|
||||
elif obj_type == 0x20: # real 0010 nnnn ... // # of bytes is 2^nnnn, big-endian bytes
|
||||
return self.__unpackFloat(offset)
|
||||
elif obj_type == 0x30: # date 0011 0011 ... // 8 byte float follows, big-endian bytes
|
||||
return self.__unpackDate(offset)
|
||||
elif obj_type == 0x40: # data 0100 nnnn [int] ... // nnnn is number of bytes unless 1111 then int count follows, followed by bytes
|
||||
obj_count, objref = self.__resolveIntSize(obj_info, offset)
|
||||
return self.data[objref:objref+obj_count] # XXX: we return data as str
|
||||
elif obj_type == 0x50: # string 0101 nnnn [int] ... // ASCII string, nnnn is # of chars, else 1111 then int count, then bytes
|
||||
obj_count, objref = self.__resolveIntSize(obj_info, offset)
|
||||
return self.data[objref:objref+obj_count]
|
||||
elif obj_type == 0x60: # string 0110 nnnn [int] ... // Unicode string, nnnn is # of chars, else 1111 then int count, then big-endian 2-byte uint16_t
|
||||
obj_count, objref = self.__resolveIntSize(obj_info, offset)
|
||||
return self.data[objref:objref+obj_count*2].decode('utf-16be')
|
||||
elif obj_type == 0x80: # uid 1000 nnnn ... // nnnn+1 is # of bytes
|
||||
# FIXME: Accept as a string for now
|
||||
obj_count, objref = self.__resolveIntSize(obj_info, offset)
|
||||
return self.data[objref:objref+obj_count]
|
||||
elif obj_type == 0xA0: # array 1010 nnnn [int] objref* // nnnn is count, unless '1111', then int count follows
|
||||
obj_count, objref = self.__resolveIntSize(obj_info, offset)
|
||||
arr = []
|
||||
for i in range(obj_count):
|
||||
arr.append(self.__unpackIntStruct(self.object_ref_size, self.data[objref+i*self.object_ref_size:objref+i*self.object_ref_size+self.object_ref_size]))
|
||||
return arr
|
||||
elif obj_type == 0xC0: # set 1100 nnnn [int] objref* // nnnn is count, unless '1111', then int count follows
|
||||
# XXX: not serializable via apple implementation
|
||||
raise Exception("0xC0 Not Implemented") # FIXME: implement
|
||||
elif obj_type == 0xD0: # dict 1101 nnnn [int] keyref* objref* // nnnn is count, unless '1111', then int count follows
|
||||
obj_count, objref = self.__resolveIntSize(obj_info, offset)
|
||||
keys = []
|
||||
for i in range(obj_count):
|
||||
keys.append(self.__unpackIntStruct(self.object_ref_size, self.data[objref+i*self.object_ref_size:objref+i*self.object_ref_size+self.object_ref_size]))
|
||||
values = []
|
||||
objref += obj_count*self.object_ref_size
|
||||
for i in range(obj_count):
|
||||
values.append(self.__unpackIntStruct(self.object_ref_size, self.data[objref+i*self.object_ref_size:objref+i*self.object_ref_size+self.object_ref_size]))
|
||||
dic = {}
|
||||
for i in range(obj_count):
|
||||
dic[keys[i]] = values[i]
|
||||
return dic
|
||||
else:
|
||||
raise Exception('don\'t know how to unpack obj type '+hex(obj_type)+' at '+str(offset))
|
||||
|
||||
def __resolveObject(self, idx):
|
||||
try:
|
||||
return self.resolved[idx]
|
||||
except KeyError:
|
||||
obj = self.objects[idx]
|
||||
if type(obj) == list:
|
||||
newArr = []
|
||||
for i in obj:
|
||||
newArr.append(self.__resolveObject(i))
|
||||
self.resolved[idx] = newArr
|
||||
return newArr
|
||||
if type(obj) == dict:
|
||||
newDic = {}
|
||||
for k,v in obj.items():
|
||||
key_resolved = self.__resolveObject(k)
|
||||
if isinstance(key_resolved, str):
|
||||
rk = key_resolved
|
||||
else:
|
||||
rk = codecs.decode(key_resolved, "utf-8")
|
||||
rv = self.__resolveObject(v)
|
||||
newDic[rk] = rv
|
||||
self.resolved[idx] = newDic
|
||||
return newDic
|
||||
else:
|
||||
self.resolved[idx] = obj
|
||||
return obj
|
||||
|
||||
def parse(self):
|
||||
# read header
|
||||
if self.data[:8] != b'bplist00':
|
||||
raise Exception('Bad magic')
|
||||
|
||||
# read trailer
|
||||
self.offset_size, self.object_ref_size, self.number_of_objects, self.top_object, self.table_offset = struct.unpack('!6xBB4xI4xI4xI', self.data[-32:])
|
||||
#print "** plist offset_size:",self.offset_size,"objref_size:",self.object_ref_size,"num_objs:",self.number_of_objects,"top:",self.top_object,"table_ofs:",self.table_offset
|
||||
|
||||
# read offset table
|
||||
self.offset_table = self.data[self.table_offset:-32]
|
||||
self.offsets = []
|
||||
ot = self.offset_table
|
||||
for i in range(self.number_of_objects):
|
||||
offset_entry = ot[:self.offset_size]
|
||||
ot = ot[self.offset_size:]
|
||||
self.offsets.append(self.__unpackIntStruct(self.offset_size, offset_entry))
|
||||
#print "** plist offsets:",self.offsets
|
||||
|
||||
# read object table
|
||||
self.objects = []
|
||||
k = 0
|
||||
for i in self.offsets:
|
||||
obj = self.__unpackItem(i)
|
||||
#print "** plist unpacked",k,type(obj),obj,"at",i
|
||||
k += 1
|
||||
self.objects.append(obj)
|
||||
|
||||
# rebuild object tree
|
||||
#for i in range(len(self.objects)):
|
||||
# self.__resolveObject(i)
|
||||
|
||||
# return root object
|
||||
return self.__resolveObject(self.top_object)
|
||||
|
||||
@classmethod
|
||||
def plistWithString(cls, s):
|
||||
parser = cls(s)
|
||||
return parser.parse()
|
||||
|
||||
# helpers for testing
|
||||
def plist(obj):
|
||||
from Foundation import NSPropertyListSerialization, NSPropertyListBinaryFormat_v1_0
|
||||
b = NSPropertyListSerialization.dataWithPropertyList_format_options_error_(obj, NSPropertyListBinaryFormat_v1_0, 0, None)
|
||||
return str(b.bytes())
|
||||
|
||||
def unplist(s):
|
||||
from Foundation import NSData, NSPropertyListSerialization
|
||||
d = NSData.dataWithBytes_length_(s, len(s))
|
||||
return NSPropertyListSerialization.propertyListWithData_options_format_error_(d, 0, None, None)
|
||||
|
||||
if __name__ == "__main__":
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
file_path = sys.argv[1]
|
||||
|
||||
with open(file_path, "rb") as fp:
|
||||
data = fp.read()
|
||||
|
||||
out = BPListReader(data).parse()
|
||||
|
||||
with open(file_path + ".json", "w") as fp:
|
||||
json.dump(out, indent=4)
|
||||
108
Whatsapp_Chat_Exporter/data_model.py
Normal file
108
Whatsapp_Chat_Exporter/data_model.py
Normal file
@@ -0,0 +1,108 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
import os
|
||||
from datetime import datetime, tzinfo, timedelta
|
||||
from typing import Union
|
||||
|
||||
|
||||
class TimeZone(tzinfo):
|
||||
def __init__(self, offset):
|
||||
self.offset = offset
|
||||
def utcoffset(self, dt):
|
||||
return timedelta(hours=self.offset)
|
||||
def dst(self, dt):
|
||||
return timedelta(0)
|
||||
|
||||
|
||||
class ChatStore():
|
||||
def __init__(self, type, name=None, media=None):
|
||||
if name is not None and not isinstance(name, str):
|
||||
raise TypeError("Name must be a string or None")
|
||||
self.name = name
|
||||
self.messages = {}
|
||||
self.type = type
|
||||
if media is not None:
|
||||
from Whatsapp_Chat_Exporter.utility import Device
|
||||
if self.type == Device.IOS:
|
||||
self.my_avatar = os.path.join(media, "Media/Profile/Photo.jpg")
|
||||
elif self.type == Device.ANDROID:
|
||||
self.my_avatar = None # TODO: Add Android support
|
||||
else:
|
||||
self.my_avatar = None
|
||||
else:
|
||||
self.my_avatar = None
|
||||
self.their_avatar = None
|
||||
self.their_avatar_thumb = None
|
||||
self.status = None
|
||||
self.media_base = ""
|
||||
|
||||
def add_message(self, id, message):
|
||||
if not isinstance(message, Message):
|
||||
raise TypeError("message must be a Message object")
|
||||
self.messages[id] = message
|
||||
|
||||
def delete_message(self, id):
|
||||
if id in self.messages:
|
||||
del self.messages[id]
|
||||
|
||||
def to_json(self):
|
||||
serialized_msgs = {id: msg.to_json() for id, msg in self.messages.items()}
|
||||
return {
|
||||
'name': self.name,
|
||||
'type': self.type,
|
||||
'my_avatar': self.my_avatar,
|
||||
'their_avatar': self.their_avatar,
|
||||
'their_avatar_thumb': self.their_avatar_thumb,
|
||||
'status': self.status,
|
||||
'messages': serialized_msgs
|
||||
}
|
||||
|
||||
def get_last_message(self):
|
||||
return tuple(self.messages.values())[-1]
|
||||
|
||||
def get_messages(self):
|
||||
return self.messages.values()
|
||||
|
||||
|
||||
class Message():
|
||||
def __init__(self, from_me: Union[bool,int], timestamp: int, time: Union[int,float,str], key_id: int, timezone_offset: int = 0):
|
||||
self.from_me = bool(from_me)
|
||||
self.timestamp = timestamp / 1000 if timestamp > 9999999999 else timestamp
|
||||
if isinstance(time, int) or isinstance(time, float):
|
||||
self.time = datetime.fromtimestamp(self.timestamp, TimeZone(timezone_offset)).strftime("%H:%M")
|
||||
elif isinstance(time, str):
|
||||
self.time = time
|
||||
else:
|
||||
raise TypeError("Time must be a string or number")
|
||||
self.media = False
|
||||
self.key_id = key_id
|
||||
self.meta = False
|
||||
self.data = None
|
||||
self.sender = None
|
||||
self.safe = False
|
||||
self.mime = None
|
||||
# Extra
|
||||
self.reply = None
|
||||
self.quoted_data = None
|
||||
self.caption = None
|
||||
self.thumb = None # Android specific
|
||||
self.sticker = False
|
||||
|
||||
def to_json(self):
|
||||
return {
|
||||
'from_me' : self.from_me,
|
||||
'timestamp' : self.timestamp,
|
||||
'time' : self.time,
|
||||
'media' : self.media,
|
||||
'key_id' : self.key_id,
|
||||
'meta' : self.meta,
|
||||
'data' : self.data,
|
||||
'sender' : self.sender,
|
||||
'safe' : self.safe,
|
||||
'mime' : self.mime,
|
||||
'reply' : self.reply,
|
||||
'quoted_data' : self.quoted_data,
|
||||
'caption' : self.caption,
|
||||
'thumb' : self.thumb,
|
||||
'sticker' : self.sticker
|
||||
}
|
||||
92
Whatsapp_Chat_Exporter/exported_handler.py
Normal file
92
Whatsapp_Chat_Exporter/exported_handler.py
Normal file
@@ -0,0 +1,92 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
import os
|
||||
from datetime import datetime
|
||||
from mimetypes import MimeTypes
|
||||
from Whatsapp_Chat_Exporter.data_model import ChatStore, Message
|
||||
from Whatsapp_Chat_Exporter.utility import Device
|
||||
|
||||
|
||||
def messages(path, data, assume_first_as_me=False):
|
||||
"""Extracts messages from the exported file"""
|
||||
with open(path, "r", encoding="utf8") as file:
|
||||
you = ""
|
||||
data["ExportedChat"] = ChatStore(Device.EXPORTED)
|
||||
chat = data["ExportedChat"]
|
||||
total_row_number = len(file.readlines())
|
||||
file.seek(0)
|
||||
for index, line in enumerate(file):
|
||||
if len(line.split(" - ")) > 1:
|
||||
time = line.split(" - ")[0]
|
||||
if ":" not in line.split(time)[1]:
|
||||
msg.data = line.split(time)[1][3:]
|
||||
msg.meta = True
|
||||
else:
|
||||
name = line.split(time)[1].split(":")[0]
|
||||
message = line.split(time)[1].split(name + ":")[1].strip()
|
||||
name = name[3:]
|
||||
if you == "":
|
||||
if chat.name is None:
|
||||
if not assume_first_as_me:
|
||||
while True:
|
||||
ans = input(f"Is '{name}' you? (Y/N)").lower()
|
||||
if ans == "y":
|
||||
you = name
|
||||
break
|
||||
elif ans == "n":
|
||||
chat.name = name
|
||||
break
|
||||
else:
|
||||
you = name
|
||||
else:
|
||||
if name != chat.name:
|
||||
you = name
|
||||
elif chat.name is None:
|
||||
if name != you:
|
||||
chat.name = name
|
||||
msg = Message(
|
||||
you == name,
|
||||
datetime.strptime(time, "%d/%m/%Y, %H:%M").timestamp(),
|
||||
time.split(", ")[1].strip(),
|
||||
index
|
||||
)
|
||||
if "<Media omitted>" in message:
|
||||
msg.data = "The media is omitted in the chat"
|
||||
msg.mime = "media"
|
||||
msg.meta = True
|
||||
elif "(file attached)" in message:
|
||||
mime = MimeTypes()
|
||||
msg.media = True
|
||||
file_path = os.path.join(os.path.dirname(path), message.split("(file attached)")[0].strip())
|
||||
if os.path.isfile(file_path):
|
||||
msg.data = file_path
|
||||
guess = mime.guess_type(file_path)[0]
|
||||
if guess is not None:
|
||||
msg.mime = guess
|
||||
else:
|
||||
msg.mime = "application/octet-stream"
|
||||
else:
|
||||
msg.data = "The media is missing"
|
||||
msg.mime = "media"
|
||||
msg.meta = True
|
||||
else:
|
||||
msg.data = message
|
||||
if "\r\n" in message:
|
||||
msg.data = message.replace("\r\n", "<br>")
|
||||
if "\n" in message:
|
||||
msg.data = message.replace("\n", "<br>")
|
||||
chat.add_message(index, msg)
|
||||
else:
|
||||
lookback = index - 1
|
||||
while lookback not in chat.messages:
|
||||
lookback -= 1
|
||||
msg = chat.messages[lookback]
|
||||
if msg.media:
|
||||
msg.caption = line.strip()
|
||||
else:
|
||||
msg.data += "<br>" + line.strip()
|
||||
|
||||
if index % 1000 == 0:
|
||||
print(f"Processing messages & media...({index}/{total_row_number})", end="\r")
|
||||
print(f"Processing messages & media...({total_row_number}/{total_row_number})", end="\r")
|
||||
return data
|
||||
@@ -1,429 +0,0 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
import sqlite3
|
||||
import json
|
||||
import jinja2
|
||||
import os
|
||||
import requests
|
||||
import shutil
|
||||
import re
|
||||
import pkgutil
|
||||
from pathlib import Path
|
||||
from bleach import clean as sanitize
|
||||
from markupsafe import Markup
|
||||
from datetime import datetime
|
||||
from mimetypes import MimeTypes
|
||||
try:
|
||||
import zlib
|
||||
from Crypto.Cipher import AES
|
||||
except ModuleNotFoundError:
|
||||
support_backup = False
|
||||
else:
|
||||
support_backup = True
|
||||
|
||||
|
||||
def sanitize_except(html):
|
||||
return Markup(sanitize(html, tags=["br"]))
|
||||
|
||||
|
||||
def determine_day(last, current):
|
||||
last = datetime.fromtimestamp(last).date()
|
||||
current = datetime.fromtimestamp(current).date()
|
||||
if last == current:
|
||||
return None
|
||||
else:
|
||||
return current
|
||||
|
||||
|
||||
def decrypt_backup(database, key, output, crypt14=True):
|
||||
if not support_backup:
|
||||
return False
|
||||
if len(key) != 158:
|
||||
raise ValueError("The key file must be 158 bytes")
|
||||
t1 = key[30:62]
|
||||
if crypt14:
|
||||
if len(database) < 191:
|
||||
raise ValueError("The crypt14 file must be at least 191 bytes")
|
||||
t2 = database[15:47]
|
||||
iv = database[67:83]
|
||||
db_ciphertext = database[191:]
|
||||
else:
|
||||
if len(database) < 67:
|
||||
raise ValueError("The crypt12 file must be at least 67 bytes")
|
||||
t2 = database[3:35]
|
||||
iv = database[51:67]
|
||||
db_ciphertext = database[67:-20]
|
||||
if t1 != t2:
|
||||
raise ValueError("The signature of key file and backup file mismatch")
|
||||
|
||||
main_key = key[126:]
|
||||
cipher = AES.new(main_key, AES.MODE_GCM, iv)
|
||||
db_compressed = cipher.decrypt(db_ciphertext)
|
||||
db = zlib.decompress(db_compressed)
|
||||
if db[0:6].upper() == b"SQLITE":
|
||||
with open(output, "wb") as f:
|
||||
f.write(db)
|
||||
return True
|
||||
else:
|
||||
raise ValueError("The plaintext is not a SQLite database. Did you use the key to encrypt something...")
|
||||
|
||||
|
||||
def contacts(db, data):
|
||||
# Get contacts
|
||||
c = db.cursor()
|
||||
c.execute("""SELECT count() FROM wa_contacts""")
|
||||
total_row_number = c.fetchone()[0]
|
||||
print(f"Gathering contacts...({total_row_number})")
|
||||
|
||||
c.execute("""SELECT jid, display_name FROM wa_contacts; """)
|
||||
row = c.fetchone()
|
||||
while row is not None:
|
||||
data[row[0]] = {"name": row[1], "messages": {}}
|
||||
row = c.fetchone()
|
||||
|
||||
|
||||
def messages(db, data):
|
||||
# Get message history
|
||||
c = db.cursor()
|
||||
c.execute("""SELECT count() FROM messages""")
|
||||
total_row_number = c.fetchone()[0]
|
||||
print(f"Gathering messages...(0/{total_row_number})", end="\r")
|
||||
|
||||
phone_number_re = re.compile(r"[0-9]+@s.whatsapp.net")
|
||||
c.execute("""SELECT messages.key_remote_jid,
|
||||
messages._id,
|
||||
messages.key_from_me,
|
||||
messages.timestamp,
|
||||
messages.data,
|
||||
messages.status,
|
||||
messages.edit_version,
|
||||
messages.thumb_image,
|
||||
messages.remote_resource,
|
||||
messages.media_wa_type,
|
||||
messages.latitude,
|
||||
messages.longitude,
|
||||
messages_quotes.key_id as quoted,
|
||||
messages.key_id,
|
||||
messages_quotes.data,
|
||||
messages.media_caption
|
||||
FROM messages
|
||||
LEFT JOIN messages_quotes
|
||||
ON messages.quoted_row_id = messages_quotes._id;""")
|
||||
i = 0
|
||||
content = c.fetchone()
|
||||
while content is not None:
|
||||
if content[0] not in data:
|
||||
data[content[0]] = {"name": None, "messages": {}}
|
||||
data[content[0]]["messages"][content[1]] = {
|
||||
"from_me": bool(content[2]),
|
||||
"timestamp": content[3]/1000,
|
||||
"time": datetime.fromtimestamp(content[3]/1000).strftime("%H:%M"),
|
||||
"media": False,
|
||||
"key_id": content[13],
|
||||
"meta": False,
|
||||
"data": None
|
||||
}
|
||||
if "-" in content[0] and content[2] == 0:
|
||||
name = None
|
||||
if content[8] in data:
|
||||
name = data[content[8]]["name"]
|
||||
if "@" in content[8]:
|
||||
fallback = content[8].split('@')[0]
|
||||
else:
|
||||
fallback = None
|
||||
else:
|
||||
fallback = None
|
||||
|
||||
data[content[0]]["messages"][content[1]]["sender"] = name or fallback
|
||||
else:
|
||||
data[content[0]]["messages"][content[1]]["sender"] = None
|
||||
|
||||
if content[12] is not None:
|
||||
data[content[0]]["messages"][content[1]]["reply"] = content[12]
|
||||
data[content[0]]["messages"][content[1]]["quoted_data"] = content[14]
|
||||
else:
|
||||
data[content[0]]["messages"][content[1]]["reply"] = None
|
||||
|
||||
if content[15] is not None:
|
||||
data[content[0]]["messages"][content[1]]["caption"] = content[15]
|
||||
else:
|
||||
data[content[0]]["messages"][content[1]]["caption"] = None
|
||||
|
||||
if content[5] == 6:
|
||||
if "-" in content[0]:
|
||||
# Is Group
|
||||
if content[4] is not None:
|
||||
try:
|
||||
int(content[4])
|
||||
except ValueError:
|
||||
msg = f"The group name changed to {content[4]}"
|
||||
data[content[0]]["messages"][content[1]]["data"] = msg
|
||||
data[content[0]]["messages"][content[1]]["meta"] = True
|
||||
else:
|
||||
del data[content[0]]["messages"][content[1]]
|
||||
else:
|
||||
thumb_image = content[7]
|
||||
if thumb_image is not None:
|
||||
if b"\x00\x00\x01\x74\x00\x1A" in thumb_image:
|
||||
# Add user
|
||||
added = phone_number_re.search(
|
||||
thumb_image.decode("unicode_escape"))[0]
|
||||
if added in data:
|
||||
name_right = data[added]["name"]
|
||||
else:
|
||||
name_right = added.split('@')[0]
|
||||
if content[8] is not None:
|
||||
if content[8] in data:
|
||||
name_left = data[content[8]]["name"]
|
||||
else:
|
||||
name_left = content[8].split('@')[0]
|
||||
msg = f"{name_left} added {name_right or 'You'}"
|
||||
else:
|
||||
msg = f"Added {name_right or 'You'}"
|
||||
elif b"\xac\xed\x00\x05\x74\x00" in thumb_image:
|
||||
# Changed number
|
||||
original = content[8].split('@')[0]
|
||||
changed = thumb_image[7:].decode().split('@')[0]
|
||||
msg = f"{original} changed to {changed}"
|
||||
data[content[0]]["messages"][content[1]]["data"] = msg
|
||||
data[content[0]]["messages"][content[1]]["meta"] = True
|
||||
else:
|
||||
if content[4] is None:
|
||||
del data[content[0]]["messages"][content[1]]
|
||||
else:
|
||||
# Private chat
|
||||
if content[4] is None and content[7] is None:
|
||||
del data[content[0]]["messages"][content[1]]
|
||||
|
||||
else:
|
||||
if content[2] == 1:
|
||||
if content[5] == 5 and content[6] == 7:
|
||||
msg = "Message deleted"
|
||||
data[content[0]]["messages"][content[1]]["meta"] = True
|
||||
else:
|
||||
if content[9] == "5":
|
||||
msg = f"Location shared: {content[10], content[11]}"
|
||||
data[content[0]]["messages"][content[1]]["meta"] = True
|
||||
else:
|
||||
msg = content[4]
|
||||
if msg is not None:
|
||||
if "\r\n" in msg:
|
||||
msg = msg.replace("\r\n", "<br>")
|
||||
if "\n" in msg:
|
||||
msg = msg.replace("\n", "<br>")
|
||||
else:
|
||||
if content[5] == 0 and content[6] == 7:
|
||||
msg = "Message deleted"
|
||||
data[content[0]]["messages"][content[1]]["meta"] = True
|
||||
else:
|
||||
if content[9] == "5":
|
||||
msg = f"Location shared: {content[10], content[11]}"
|
||||
data[content[0]]["messages"][content[1]]["meta"] = True
|
||||
else:
|
||||
msg = content[4]
|
||||
if msg is not None:
|
||||
if "\r\n" in msg:
|
||||
msg = msg.replace("\r\n", "<br>")
|
||||
if "\n" in msg:
|
||||
msg = msg.replace("\n", "<br>")
|
||||
|
||||
data[content[0]]["messages"][content[1]]["data"] = msg
|
||||
|
||||
i += 1
|
||||
if i % 1000 == 0:
|
||||
print(f"Gathering messages...({i}/{total_row_number})", end="\r")
|
||||
content = c.fetchone()
|
||||
print(f"Gathering messages...({total_row_number}/{total_row_number})", end="\r")
|
||||
|
||||
|
||||
def media(db, data, media_folder):
|
||||
# Get media
|
||||
c = db.cursor()
|
||||
c.execute("""SELECT count() FROM message_media""")
|
||||
total_row_number = c.fetchone()[0]
|
||||
print(f"\nGathering media...(0/{total_row_number})", end="\r")
|
||||
i = 0
|
||||
c.execute("""SELECT messages.key_remote_jid,
|
||||
message_row_id,
|
||||
file_path,
|
||||
message_url,
|
||||
mime_type,
|
||||
media_key
|
||||
FROM message_media
|
||||
INNER JOIN messages
|
||||
ON message_media.message_row_id = messages._id
|
||||
ORDER BY messages.key_remote_jid ASC""")
|
||||
content = c.fetchone()
|
||||
mime = MimeTypes()
|
||||
while content is not None:
|
||||
file_path = f"{media_folder}/{content[2]}"
|
||||
data[content[0]]["messages"][content[1]]["media"] = True
|
||||
if os.path.isfile(file_path):
|
||||
data[content[0]]["messages"][content[1]]["data"] = file_path
|
||||
if content[4] is None:
|
||||
guess = mime.guess_type(file_path)[0]
|
||||
if guess is not None:
|
||||
data[content[0]]["messages"][content[1]]["mime"] = guess
|
||||
else:
|
||||
data[content[0]]["messages"][content[1]]["mime"] = "data/data"
|
||||
else:
|
||||
data[content[0]]["messages"][content[1]]["mime"] = content[4]
|
||||
else:
|
||||
# if "https://mmg" in content[4]:
|
||||
# try:
|
||||
# r = requests.get(content[3])
|
||||
# if r.status_code != 200:
|
||||
# raise RuntimeError()
|
||||
# except:
|
||||
# data[content[0]]["messages"][content[1]]["data"] = "{The media is missing}"
|
||||
# data[content[0]]["messages"][content[1]]["media"] = True
|
||||
# data[content[0]]["messages"][content[1]]["mime"] = "media"
|
||||
# else:
|
||||
data[content[0]]["messages"][content[1]]["data"] = "The media is missing"
|
||||
data[content[0]]["messages"][content[1]]["mime"] = "media"
|
||||
data[content[0]]["messages"][content[1]]["meta"] = True
|
||||
i += 1
|
||||
if i % 100 == 0:
|
||||
print(f"Gathering media...({i}/{total_row_number})", end="\r")
|
||||
content = c.fetchone()
|
||||
print(
|
||||
f"Gathering media...({total_row_number}/{total_row_number})", end="\r")
|
||||
|
||||
|
||||
def vcard(db, data):
|
||||
c = db.cursor()
|
||||
c.execute("""SELECT message_row_id,
|
||||
messages.key_remote_jid,
|
||||
vcard,
|
||||
messages.media_name
|
||||
FROM messages_vcards
|
||||
INNER JOIN messages
|
||||
ON messages_vcards.message_row_id = messages._id
|
||||
ORDER BY messages.key_remote_jid ASC;""")
|
||||
rows = c.fetchall()
|
||||
total_row_number = len(rows)
|
||||
print(f"\nGathering vCards...(0/{total_row_number})", end="\r")
|
||||
base = "WhatsApp/vCards"
|
||||
if not os.path.isdir(base):
|
||||
Path(base).mkdir(parents=True, exist_ok=True)
|
||||
for index, row in enumerate(rows):
|
||||
file_name = "".join(x for x in row[3] if x.isalnum())
|
||||
file_path = f"{base}/{file_name}.vcf"
|
||||
if not os.path.isfile(file_path):
|
||||
with open(file_path, "w", encoding="utf-8") as f:
|
||||
f.write(row[2])
|
||||
data[row[1]]["messages"][row[0]]["data"] = row[3] + \
|
||||
"The vCard file cannot be displayed here, " \
|
||||
f"however it should be located at {file_path}"
|
||||
data[row[1]]["messages"][row[0]]["mime"] = "text/x-vcard"
|
||||
data[row[1]]["messages"][row[0]]["meta"] = True
|
||||
print(f"Gathering vCards...({index + 1}/{total_row_number})", end="\r")
|
||||
|
||||
|
||||
def create_html(data, output_folder, template=None):
|
||||
if template is None:
|
||||
template_dir = os.path.dirname(__file__)
|
||||
template_file = "whatsapp.html"
|
||||
else:
|
||||
template_dir = os.path.dirname(template)
|
||||
template_file = os.path.basename(template)
|
||||
templateLoader = jinja2.FileSystemLoader(searchpath=template_dir)
|
||||
templateEnv = jinja2.Environment(loader=templateLoader)
|
||||
templateEnv.globals.update(determine_day=determine_day)
|
||||
templateEnv.filters['sanitize_except'] = sanitize_except
|
||||
template = templateEnv.get_template(template_file)
|
||||
|
||||
total_row_number = len(data)
|
||||
print(f"\nCreating HTML...(0/{total_row_number})", end="\r")
|
||||
|
||||
if not os.path.isdir(output_folder):
|
||||
os.mkdir(output_folder)
|
||||
|
||||
for current, contact in enumerate(data):
|
||||
if len(data[contact]["messages"]) == 0:
|
||||
continue
|
||||
phone_number = contact.split('@')[0]
|
||||
if "-" in contact:
|
||||
file_name = ""
|
||||
else:
|
||||
file_name = phone_number
|
||||
|
||||
if data[contact]["name"] is not None:
|
||||
if file_name != "":
|
||||
file_name += "-"
|
||||
file_name += data[contact]["name"].replace("/", "-")
|
||||
name = data[contact]["name"]
|
||||
else:
|
||||
name = phone_number
|
||||
safe_file_name = ''
|
||||
safe_file_name = "".join(x for x in file_name if x.isalnum() or x in "- ")
|
||||
with open(f"{output_folder}/{safe_file_name}.html", "w", encoding="utf-8") as f:
|
||||
f.write(
|
||||
template.render(
|
||||
name=name,
|
||||
msgs=data[contact]["messages"].values(),
|
||||
my_avatar=None,
|
||||
their_avatar=f"WhatsApp/Avatars/{contact}.j"
|
||||
)
|
||||
)
|
||||
if current % 10 == 0:
|
||||
print(f"Creating HTML...({current}/{total_row_number})", end="\r")
|
||||
|
||||
print(f"Creating HTML...({total_row_number}/{total_row_number})", end="\r")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from optparse import OptionParser
|
||||
parser = OptionParser()
|
||||
parser.add_option(
|
||||
"-w",
|
||||
"--wa",
|
||||
dest="wa",
|
||||
default="wa.db",
|
||||
help="Path to contact database")
|
||||
parser.add_option(
|
||||
"-m",
|
||||
"--media",
|
||||
dest="media",
|
||||
default="WhatsApp",
|
||||
help="Path to WhatsApp media folder"
|
||||
)
|
||||
# parser.add_option(
|
||||
# "-t",
|
||||
# "--template",
|
||||
# dest="html",
|
||||
# default="wa.db",
|
||||
# help="Path to HTML template")
|
||||
(options, args) = parser.parse_args()
|
||||
msg_db = "msgstore.db"
|
||||
output_folder = "temp"
|
||||
contact_db = options.wa
|
||||
media_folder = options.media
|
||||
|
||||
if len(args) == 1:
|
||||
msg_db = args[0]
|
||||
elif len(args) == 2:
|
||||
msg_db = args[0]
|
||||
output_folder = args[1]
|
||||
|
||||
data = {}
|
||||
|
||||
if os.path.isfile(contact_db):
|
||||
with sqlite3.connect(contact_db) as db:
|
||||
contacts(db, data)
|
||||
if os.path.isfile(msg_db):
|
||||
with sqlite3.connect(msg_db) as db:
|
||||
messages(db, data)
|
||||
media(db, data, media_folder)
|
||||
vcard(db, data)
|
||||
create_html(data, output_folder)
|
||||
|
||||
if not os.path.isdir(f"{output_folder}/WhatsApp"):
|
||||
shutil.move(media_folder, f"{output_folder}/")
|
||||
|
||||
with open("result.json", "w") as f:
|
||||
data = json.dumps(data)
|
||||
print(f"\nWriting JSON file...({int(len(data)/1024/1024)}MB)")
|
||||
f.write(data)
|
||||
|
||||
print("Everything is done!")
|
||||
@@ -1,337 +0,0 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
import sqlite3
|
||||
import json
|
||||
import jinja2
|
||||
import os
|
||||
import requests
|
||||
import shutil
|
||||
import pkgutil
|
||||
from pathlib import Path
|
||||
from bleach import clean as sanitize
|
||||
from markupsafe import Markup
|
||||
from datetime import datetime
|
||||
from mimetypes import MimeTypes
|
||||
|
||||
APPLE_TIME = datetime.timestamp(datetime(2001, 1, 1))
|
||||
|
||||
|
||||
def sanitize_except(html):
|
||||
return Markup(sanitize(html, tags=["br"]))
|
||||
|
||||
|
||||
def determine_day(last, current):
|
||||
last = datetime.fromtimestamp(last).date()
|
||||
current = datetime.fromtimestamp(current).date()
|
||||
if last == current:
|
||||
return None
|
||||
else:
|
||||
return current
|
||||
|
||||
|
||||
def messages(db, data):
|
||||
c = db.cursor()
|
||||
# Get contacts
|
||||
c.execute("""SELECT count() FROM ZWACHATSESSION""")
|
||||
total_row_number = c.fetchone()[0]
|
||||
print(f"Gathering contacts...({total_row_number})")
|
||||
|
||||
c.execute("""SELECT ZCONTACTJID, ZPARTNERNAME FROM ZWACHATSESSION; """)
|
||||
row = c.fetchone()
|
||||
while row is not None:
|
||||
data[row[0]] = {"name": row[1], "messages": {}}
|
||||
row = c.fetchone()
|
||||
|
||||
# Get message history
|
||||
c.execute("""SELECT count() FROM ZWAMESSAGE""")
|
||||
total_row_number = c.fetchone()[0]
|
||||
print(f"Gathering messages...(0/{total_row_number})", end="\r")
|
||||
|
||||
c.execute("""SELECT COALESCE(ZFROMJID, ZTOJID),
|
||||
ZWAMESSAGE.Z_PK,
|
||||
ZISFROMME,
|
||||
ZMESSAGEDATE,
|
||||
ZTEXT,
|
||||
ZMESSAGETYPE,
|
||||
ZWAGROUPMEMBER.ZMEMBERJID
|
||||
FROM main.ZWAMESSAGE
|
||||
LEFT JOIN main.ZWAGROUPMEMBER
|
||||
ON main.ZWAMESSAGE.ZGROUPMEMBER = main.ZWAGROUPMEMBER.Z_PK;""")
|
||||
i = 0
|
||||
content = c.fetchone()
|
||||
while content is not None:
|
||||
if content[0] not in data:
|
||||
data[content[0]] = {"name": None, "messages": {}}
|
||||
ts = APPLE_TIME + content[3]
|
||||
data[content[0]]["messages"][content[1]] = {
|
||||
"from_me": bool(content[2]),
|
||||
"timestamp": ts,
|
||||
"time": datetime.fromtimestamp(ts).strftime("%H:%M"),
|
||||
"media": False,
|
||||
"reply": None,
|
||||
"caption": None,
|
||||
"meta": False,
|
||||
"data": None
|
||||
}
|
||||
if "-" in content[0] and content[2] == 0:
|
||||
name = None
|
||||
if content[6] is not None:
|
||||
if content[6] in data:
|
||||
name = data[content[6]]["name"]
|
||||
if "@" in content[6]:
|
||||
fallback = content[6].split('@')[0]
|
||||
else:
|
||||
fallback = None
|
||||
else:
|
||||
fallback = None
|
||||
data[content[0]]["messages"][content[1]]["sender"] = name or fallback
|
||||
else:
|
||||
data[content[0]]["messages"][content[1]]["sender"] = None
|
||||
if content[5] == 6:
|
||||
# Metadata
|
||||
if "-" in content[0]:
|
||||
# Group
|
||||
if content[4] is not None:
|
||||
# Chnaged name
|
||||
try:
|
||||
int(content[4])
|
||||
except ValueError:
|
||||
msg = f"The group name changed to {content[4]}"
|
||||
data[content[0]]["messages"][content[1]]["data"] = msg
|
||||
data[content[0]]["messages"][content[1]]["meta"] = True
|
||||
else:
|
||||
del data[content[0]]["messages"][content[1]]
|
||||
else:
|
||||
data[content[0]]["messages"][content[1]]["data"] = None
|
||||
else:
|
||||
data[content[0]]["messages"][content[1]]["data"] = None
|
||||
else:
|
||||
# real message
|
||||
if content[2] == 1:
|
||||
if content[5] == 14:
|
||||
msg = "Message deleted"
|
||||
data[content[0]]["messages"][content[1]]["meta"] = True
|
||||
else:
|
||||
msg = content[4]
|
||||
if msg is not None:
|
||||
if "\r\n" in msg:
|
||||
msg = msg.replace("\r\n", "<br>")
|
||||
if "\n" in msg:
|
||||
msg = msg.replace("\n", "<br>")
|
||||
else:
|
||||
if content[5] == 14:
|
||||
msg = "Message deleted"
|
||||
data[content[0]]["messages"][content[1]]["meta"] = True
|
||||
else:
|
||||
msg = content[4]
|
||||
if msg is not None:
|
||||
if "\r\n" in msg:
|
||||
msg = msg.replace("\r\n", "<br>")
|
||||
if "\n" in msg:
|
||||
msg = msg.replace("\n", "<br>")
|
||||
data[content[0]]["messages"][content[1]]["data"] = msg
|
||||
i += 1
|
||||
if i % 1000 == 0:
|
||||
print(f"Gathering messages...({i}/{total_row_number})", end="\r")
|
||||
content = c.fetchone()
|
||||
print(
|
||||
f"Gathering messages...({total_row_number}/{total_row_number})", end="\r")
|
||||
|
||||
|
||||
def media(db, data, media_folder):
|
||||
c = db.cursor()
|
||||
# Get media
|
||||
c.execute("""SELECT count() FROM ZWAMEDIAITEM""")
|
||||
total_row_number = c.fetchone()[0]
|
||||
print(f"\nGathering media...(0/{total_row_number})", end="\r")
|
||||
i = 0
|
||||
c.execute("""SELECT COALESCE(ZWAMESSAGE.ZFROMJID, ZWAMESSAGE.ZTOJID) as _id,
|
||||
ZMESSAGE,
|
||||
ZMEDIALOCALPATH,
|
||||
ZMEDIAURL,
|
||||
ZVCARDSTRING,
|
||||
ZMEDIAKEY,
|
||||
ZTITLE
|
||||
FROM ZWAMEDIAITEM
|
||||
INNER JOIN ZWAMESSAGE
|
||||
ON ZWAMEDIAITEM.ZMESSAGE = ZWAMESSAGE.Z_PK
|
||||
WHERE ZMEDIALOCALPATH IS NOT NULL
|
||||
ORDER BY _id ASC""")
|
||||
content = c.fetchone()
|
||||
mime = MimeTypes()
|
||||
while content is not None:
|
||||
file_path = f"{media_folder}/{content[2]}"
|
||||
data[content[0]]["messages"][content[1]]["media"] = True
|
||||
|
||||
if os.path.isfile(file_path):
|
||||
data[content[0]]["messages"][content[1]]["data"] = file_path
|
||||
if content[4] is None:
|
||||
guess = mime.guess_type(file_path)[0]
|
||||
if guess is not None:
|
||||
data[content[0]]["messages"][content[1]]["mime"] = guess
|
||||
else:
|
||||
data[content[0]]["messages"][content[1]]["mime"] = "data/data"
|
||||
else:
|
||||
data[content[0]]["messages"][content[1]]["mime"] = content[4]
|
||||
else:
|
||||
# if "https://mmg" in content[4]:
|
||||
# try:
|
||||
# r = requests.get(content[3])
|
||||
# if r.status_code != 200:
|
||||
# raise RuntimeError()
|
||||
# except:
|
||||
# data[content[0]]["messages"][content[1]]["data"] = "{The media is missing}"
|
||||
# data[content[0]]["messages"][content[1]]["mime"] = "media"
|
||||
# else:
|
||||
data[content[0]]["messages"][content[1]]["data"] = "The media is missing"
|
||||
data[content[0]]["messages"][content[1]]["mime"] = "media"
|
||||
data[content[0]]["messages"][content[1]]["meta"] = True
|
||||
if content[6] is not None:
|
||||
data[content[0]]["messages"][content[1]]["caption"] = content[6]
|
||||
i += 1
|
||||
if i % 100 == 0:
|
||||
print(f"Gathering media...({i}/{total_row_number})", end="\r")
|
||||
content = c.fetchone()
|
||||
print(
|
||||
f"Gathering media...({total_row_number}/{total_row_number})", end="\r")
|
||||
|
||||
|
||||
def vcard(db, data):
|
||||
c = db.cursor()
|
||||
c.execute("""SELECT DISTINCT ZWAVCARDMENTION.ZMEDIAITEM,
|
||||
ZWAMEDIAITEM.ZMESSAGE,
|
||||
COALESCE(ZWAMESSAGE.ZFROMJID,
|
||||
ZWAMESSAGE.ZTOJID) as _id,
|
||||
ZVCARDNAME,
|
||||
ZVCARDSTRING
|
||||
FROM ZWAVCARDMENTION
|
||||
INNER JOIN ZWAMEDIAITEM
|
||||
ON ZWAVCARDMENTION.ZMEDIAITEM = ZWAMEDIAITEM.Z_PK
|
||||
INNER JOIN ZWAMESSAGE
|
||||
ON ZWAMEDIAITEM.ZMESSAGE = ZWAMESSAGE.Z_PK""")
|
||||
rows = c.fetchall()
|
||||
total_row_number = len(rows)
|
||||
print(f"\nGathering vCards...(0/{total_row_number})", end="\r")
|
||||
base = "Message/vCards"
|
||||
if not os.path.isdir(base):
|
||||
Path(base).mkdir(parents=True, exist_ok=True)
|
||||
for index, row in enumerate(rows):
|
||||
file_name = "".join(x for x in row[3] if x.isalnum())
|
||||
file_path = f"{base}/{file_name[:200]}.vcf"
|
||||
if not os.path.isfile(file_path):
|
||||
with open(file_path, "w", encoding="utf-8") as f:
|
||||
f.write(row[4])
|
||||
data[row[2]]["messages"][row[1]]["data"] = row[3] + \
|
||||
"The vCard file cannot be displayed here, " \
|
||||
f"however it should be located at {file_path}"
|
||||
data[row[2]]["messages"][row[1]]["mime"] = "text/x-vcard"
|
||||
data[row[2]]["messages"][row[1]]["media"] = True
|
||||
data[row[2]]["messages"][row[1]]["meta"] = True
|
||||
print(f"Gathering vCards...({index + 1}/{total_row_number})", end="\r")
|
||||
|
||||
|
||||
def create_html(data, output_folder, template=None):
|
||||
if template is None:
|
||||
template_dir = os.path.dirname(__file__)
|
||||
template_file = "whatsapp.html"
|
||||
else:
|
||||
template_dir = os.path.dirname(template)
|
||||
template_file = os.path.basename(template)
|
||||
templateLoader = jinja2.FileSystemLoader(searchpath=template_dir)
|
||||
templateEnv = jinja2.Environment(loader=templateLoader)
|
||||
templateEnv.globals.update(determine_day=determine_day)
|
||||
templateEnv.filters['sanitize_except'] = sanitize_except
|
||||
template = templateEnv.get_template(template_file)
|
||||
|
||||
total_row_number = len(data)
|
||||
print(f"\nCreating HTML...(0/{total_row_number})", end="\r")
|
||||
|
||||
if not os.path.isdir(output_folder):
|
||||
os.mkdir(output_folder)
|
||||
|
||||
for current, contact in enumerate(data):
|
||||
if len(data[contact]["messages"]) == 0:
|
||||
continue
|
||||
phone_number = contact.split('@')[0]
|
||||
if "-" in contact:
|
||||
file_name = ""
|
||||
else:
|
||||
file_name = phone_number
|
||||
|
||||
if data[contact]["name"] is not None:
|
||||
if file_name != "":
|
||||
file_name += "-"
|
||||
file_name += data[contact]["name"].replace("/", "-")
|
||||
name = data[contact]["name"]
|
||||
else:
|
||||
name = phone_number
|
||||
|
||||
safe_file_name = ''
|
||||
safe_file_name = "".join(x for x in file_name if x.isalnum() or x in "- ")
|
||||
with open(f"{output_folder}/{safe_file_name}.html", "w", encoding="utf-8") as f:
|
||||
f.write(
|
||||
template.render(
|
||||
name=name,
|
||||
msgs=data[contact]["messages"].values(),
|
||||
my_avatar=None,
|
||||
their_avatar=f"WhatsApp/Avatars/{contact}.j"
|
||||
)
|
||||
)
|
||||
if current % 10 == 0:
|
||||
print(f"Creating HTML...({current}/{total_row_number})", end="\r")
|
||||
|
||||
print(f"Creating HTML...({total_row_number}/{total_row_number})", end="\r")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from optparse import OptionParser
|
||||
parser = OptionParser()
|
||||
parser.add_option(
|
||||
"-w",
|
||||
"--wa",
|
||||
dest="wa",
|
||||
default="wa.db",
|
||||
help="Path to contact database")
|
||||
parser.add_option(
|
||||
"-m",
|
||||
"--media",
|
||||
dest="media",
|
||||
default="Message",
|
||||
help="Path to WhatsApp media folder"
|
||||
)
|
||||
# parser.add_option(
|
||||
# "-t",
|
||||
# "--template",
|
||||
# dest="html",
|
||||
# default="wa.db",
|
||||
# help="Path to HTML template")
|
||||
(options, args) = parser.parse_args()
|
||||
msg_db = "7c7fba66680ef796b916b067077cc246adacf01d"
|
||||
output_folder = "temp"
|
||||
contact_db = options.wa
|
||||
media_folder = options.media
|
||||
|
||||
if len(args) == 1:
|
||||
msg_db = args[0]
|
||||
elif len(args) == 2:
|
||||
msg_db = args[0]
|
||||
output_folder = args[1]
|
||||
|
||||
data = {}
|
||||
|
||||
if os.path.isfile(msg_db):
|
||||
with sqlite3.connect(msg_db) as db:
|
||||
messages(db, data)
|
||||
media(db, data, media_folder)
|
||||
vcard(db, data)
|
||||
create_html(data, output_folder)
|
||||
|
||||
if not os.path.isdir(f"{output_folder}/WhatsApp"):
|
||||
shutil.move(media_folder, f"{output_folder}/")
|
||||
|
||||
with open("result.json", "w") as f:
|
||||
data = json.dumps(data)
|
||||
print(f"\nWriting JSON file...({int(len(data)/1024/1024)}MB)")
|
||||
f.write(data)
|
||||
|
||||
print("Everything is done!")
|
||||
@@ -1,133 +0,0 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
import shutil
|
||||
import sqlite3
|
||||
import os
|
||||
import getpass
|
||||
try:
|
||||
from iphone_backup_decrypt import EncryptedBackup, RelativePath
|
||||
except ModuleNotFoundError:
|
||||
support_encrypted = False
|
||||
else:
|
||||
support_encrypted = True
|
||||
|
||||
|
||||
def extract_encrypted(base_dir, password):
|
||||
backup = EncryptedBackup(backup_directory=base_dir, passphrase=password)
|
||||
print("Decrypting WhatsApp database...")
|
||||
backup.extract_file(relative_path=RelativePath.WHATSAPP_MESSAGES,
|
||||
output_filename="7c7fba66680ef796b916b067077cc246adacf01d")
|
||||
backup.extract_file(relative_path=RelativePath.WHATSAPP_CONTACTS,
|
||||
output_filename="ContactsV2.sqlite")
|
||||
data = backup.execute_sql("""SELECT count()
|
||||
FROM Files
|
||||
WHERE relativePath
|
||||
LIKE 'Message/Media/%'"""
|
||||
)
|
||||
total_row_number = data[0][0]
|
||||
print(f"Gathering media...(0/{total_row_number})", end="\r")
|
||||
data = backup.execute_sql("""SELECT fileID,
|
||||
relativePath,
|
||||
flags,
|
||||
file
|
||||
FROM Files
|
||||
WHERE relativePath
|
||||
LIKE 'Message/Media/%'"""
|
||||
)
|
||||
if not os.path.isdir("Message"):
|
||||
os.mkdir("Message")
|
||||
if not os.path.isdir("Message/Media"):
|
||||
os.mkdir("Message/Media")
|
||||
i = 0
|
||||
for row in data:
|
||||
destination = row[1]
|
||||
hashes = row[0]
|
||||
folder = hashes[:2]
|
||||
flags = row[2]
|
||||
file = row[3]
|
||||
if flags == 2:
|
||||
try:
|
||||
os.mkdir(destination)
|
||||
except FileExistsError:
|
||||
pass
|
||||
elif flags == 1:
|
||||
decrypted = backup.decrypt_inner_file(file_id=hashes, file_bplist=file)
|
||||
with open(destination, "wb") as f:
|
||||
f.write(decrypted)
|
||||
i += 1
|
||||
if i % 100 == 0:
|
||||
print(f"Gathering media...({i}/{total_row_number})", end="\r")
|
||||
print(f"Gathering media...({total_row_number}/{total_row_number})", end="\r")
|
||||
|
||||
|
||||
def is_encrypted(base_dir):
|
||||
with sqlite3.connect(f"{base_dir}/Manifest.db") as f:
|
||||
c = f.cursor()
|
||||
try:
|
||||
c.execute("""SELECT count()
|
||||
FROM Files
|
||||
""")
|
||||
except sqlite3.DatabaseError:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
def extract_media(base_dir):
|
||||
if is_encrypted(base_dir):
|
||||
if not support_encrypted:
|
||||
print("You don't have the dependencies to handle encrypted backup.")
|
||||
print("Read more on how to deal with encrypted backup:")
|
||||
print("https://github.com/KnugiHK/Whatsapp-Chat-Exporter/blob/main/README.md#usage")
|
||||
return False
|
||||
password = getpass.getpass("Enter the password:")
|
||||
extract_encrypted(base_dir, password)
|
||||
else:
|
||||
wts_db = os.path.join(base_dir, "7c/7c7fba66680ef796b916b067077cc246adacf01d")
|
||||
if not os.path.isfile(wts_db):
|
||||
print("WhatsApp database not found.")
|
||||
exit()
|
||||
else:
|
||||
shutil.copyfile(wts_db, "7c7fba66680ef796b916b067077cc246adacf01d")
|
||||
with sqlite3.connect(f"{base_dir}/Manifest.db") as manifest:
|
||||
c = manifest.cursor()
|
||||
c.execute("""SELECT count()
|
||||
FROM Files
|
||||
WHERE relativePath
|
||||
LIKE 'Message/Media/%'""")
|
||||
total_row_number = c.fetchone()[0]
|
||||
print(f"Gathering media...(0/{total_row_number})", end="\r")
|
||||
c.execute("""SELECT fileID,
|
||||
relativePath,
|
||||
flags
|
||||
FROM Files
|
||||
WHERE relativePath
|
||||
LIKE 'Message/Media/%'""")
|
||||
row = c.fetchone()
|
||||
if not os.path.isdir("Message"):
|
||||
os.mkdir("Message")
|
||||
if not os.path.isdir("Message/Media"):
|
||||
os.mkdir("Message/Media")
|
||||
i = 0
|
||||
while row is not None:
|
||||
destination = row[1]
|
||||
hashes = row[0]
|
||||
folder = hashes[:2]
|
||||
flags = row[2]
|
||||
if flags == 2:
|
||||
os.mkdir(destination)
|
||||
elif flags == 1:
|
||||
shutil.copyfile(f"{base_dir}/{folder}/{hashes}", destination)
|
||||
i += 1
|
||||
if i % 100 == 0:
|
||||
print(f"Gathering media...({i}/{total_row_number})", end="\r")
|
||||
row = c.fetchone()
|
||||
print(f"Gathering media...({total_row_number}/{total_row_number})", end="\r")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from optparse import OptionParser
|
||||
parser = OptionParser()
|
||||
(_, args) = parser.parse_args()
|
||||
base_dir = args[0]
|
||||
extract_media(base_dir)
|
||||
363
Whatsapp_Chat_Exporter/ios_handler.py
Normal file
363
Whatsapp_Chat_Exporter/ios_handler.py
Normal file
@@ -0,0 +1,363 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
import os
|
||||
import shutil
|
||||
from glob import glob
|
||||
from pathlib import Path
|
||||
from mimetypes import MimeTypes
|
||||
from markupsafe import escape as htmle
|
||||
from Whatsapp_Chat_Exporter.data_model import ChatStore, Message
|
||||
from Whatsapp_Chat_Exporter.utility import APPLE_TIME, Device, get_chat_condition, slugify
|
||||
|
||||
|
||||
def contacts(db, data):
|
||||
c = db.cursor()
|
||||
# Get status only lol
|
||||
c.execute("""SELECT count() FROM ZWAADDRESSBOOKCONTACT WHERE ZABOUTTEXT IS NOT NULL""")
|
||||
total_row_number = c.fetchone()[0]
|
||||
print(f"Pre-processing contacts...({total_row_number})")
|
||||
c.execute("""SELECT ZWHATSAPPID, ZABOUTTEXT FROM ZWAADDRESSBOOKCONTACT WHERE ZABOUTTEXT IS NOT NULL""")
|
||||
content = c.fetchone()
|
||||
while content is not None:
|
||||
if not content["ZWHATSAPPID"].endswith("@s.whatsapp.net"):
|
||||
ZWHATSAPPID = content["ZWHATSAPPID"] + "@s.whatsapp.net"
|
||||
data[ZWHATSAPPID] = ChatStore(Device.IOS)
|
||||
data[ZWHATSAPPID].status = content["ZABOUTTEXT"]
|
||||
content = c.fetchone()
|
||||
|
||||
|
||||
def messages(db, data, media_folder, timezone_offset, filter_date, filter_chat):
|
||||
c = db.cursor()
|
||||
# Get contacts
|
||||
c.execute(
|
||||
f"""SELECT count()
|
||||
FROM (SELECT DISTINCT ZCONTACTJID,
|
||||
ZPARTNERNAME,
|
||||
ZWAPROFILEPUSHNAME.ZPUSHNAME
|
||||
FROM ZWACHATSESSION
|
||||
INNER JOIN ZWAMESSAGE
|
||||
ON ZWAMESSAGE.ZCHATSESSION = ZWACHATSESSION.Z_PK
|
||||
LEFT JOIN ZWAPROFILEPUSHNAME
|
||||
ON ZWACHATSESSION.ZCONTACTJID = ZWAPROFILEPUSHNAME.ZJID
|
||||
LEFT JOIN ZWAGROUPMEMBER
|
||||
ON ZWAMESSAGE.ZGROUPMEMBER = ZWAGROUPMEMBER.Z_PK
|
||||
WHERE 1=1
|
||||
{get_chat_condition(filter_chat[0], True, ["ZWACHATSESSION.ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios")}
|
||||
{get_chat_condition(filter_chat[1], False, ["ZWACHATSESSION.ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios")}
|
||||
GROUP BY ZCONTACTJID);"""
|
||||
)
|
||||
total_row_number = c.fetchone()[0]
|
||||
print(f"Processing contacts...({total_row_number})")
|
||||
|
||||
c.execute(
|
||||
f"""SELECT DISTINCT ZCONTACTJID,
|
||||
ZPARTNERNAME,
|
||||
ZWAPROFILEPUSHNAME.ZPUSHNAME
|
||||
FROM ZWACHATSESSION
|
||||
INNER JOIN ZWAMESSAGE
|
||||
ON ZWAMESSAGE.ZCHATSESSION = ZWACHATSESSION.Z_PK
|
||||
LEFT JOIN ZWAPROFILEPUSHNAME
|
||||
ON ZWACHATSESSION.ZCONTACTJID = ZWAPROFILEPUSHNAME.ZJID
|
||||
LEFT JOIN ZWAGROUPMEMBER
|
||||
ON ZWAMESSAGE.ZGROUPMEMBER = ZWAGROUPMEMBER.Z_PK
|
||||
WHERE 1=1
|
||||
{get_chat_condition(filter_chat[0], True, ["ZWACHATSESSION.ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios")}
|
||||
{get_chat_condition(filter_chat[1], False, ["ZWACHATSESSION.ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios")}
|
||||
GROUP BY ZCONTACTJID;"""
|
||||
)
|
||||
content = c.fetchone()
|
||||
while content is not None:
|
||||
is_phone = content["ZPARTNERNAME"].replace("+", "").replace(" ", "").isdigit()
|
||||
if content["ZPUSHNAME"] is None or (content["ZPUSHNAME"] and not is_phone):
|
||||
contact_name = content["ZPARTNERNAME"]
|
||||
else:
|
||||
contact_name = content["ZPUSHNAME"]
|
||||
contact_id = content["ZCONTACTJID"]
|
||||
if contact_id not in data:
|
||||
data[contact_id] = ChatStore(Device.IOS, contact_name, media_folder)
|
||||
else:
|
||||
data[contact_id].name = contact_name
|
||||
data[contact_id].my_avatar = os.path.join(media_folder, "Media/Profile/Photo.jpg")
|
||||
path = f'{media_folder}/Media/Profile/{contact_id.split("@")[0]}'
|
||||
avatars = glob(f"{path}*")
|
||||
if 0 < len(avatars) <= 1:
|
||||
data[contact_id].their_avatar = avatars[0]
|
||||
else:
|
||||
for avatar in avatars:
|
||||
if avatar.endswith(".thumb") and data[content["ZCONTACTJID"]].their_avatar_thumb is None:
|
||||
data[contact_id].their_avatar_thumb = avatar
|
||||
elif avatar.endswith(".jpg") and data[content["ZCONTACTJID"]].their_avatar is None:
|
||||
data[contact_id].their_avatar = avatar
|
||||
content = c.fetchone()
|
||||
|
||||
# Get message history
|
||||
c.execute(f"""SELECT count()
|
||||
FROM ZWAMESSAGE
|
||||
INNER JOIN ZWACHATSESSION
|
||||
ON ZWAMESSAGE.ZCHATSESSION = ZWACHATSESSION.Z_PK
|
||||
LEFT JOIN ZWAGROUPMEMBER
|
||||
ON ZWAMESSAGE.ZGROUPMEMBER = ZWAGROUPMEMBER.Z_PK
|
||||
WHERE 1=1
|
||||
{f'AND ZMESSAGEDATE {filter_date}' if filter_date is not None else ''}
|
||||
{get_chat_condition(filter_chat[0], True, ["ZWACHATSESSION.ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios")}
|
||||
{get_chat_condition(filter_chat[1], False, ["ZWACHATSESSION.ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios")}""")
|
||||
total_row_number = c.fetchone()[0]
|
||||
print(f"Processing messages...(0/{total_row_number})", end="\r")
|
||||
c.execute(f"""SELECT ZCONTACTJID,
|
||||
ZWAMESSAGE.Z_PK,
|
||||
ZISFROMME,
|
||||
ZMESSAGEDATE,
|
||||
ZTEXT,
|
||||
ZMESSAGETYPE,
|
||||
ZWAGROUPMEMBER.ZMEMBERJID,
|
||||
ZMETADATA,
|
||||
ZSTANZAID,
|
||||
ZGROUPINFO
|
||||
FROM ZWAMESSAGE
|
||||
LEFT JOIN ZWAGROUPMEMBER
|
||||
ON ZWAMESSAGE.ZGROUPMEMBER = ZWAGROUPMEMBER.Z_PK
|
||||
LEFT JOIN ZWAMEDIAITEM
|
||||
ON ZWAMESSAGE.Z_PK = ZWAMEDIAITEM.ZMESSAGE
|
||||
INNER JOIN ZWACHATSESSION
|
||||
ON ZWAMESSAGE.ZCHATSESSION = ZWACHATSESSION.Z_PK
|
||||
WHERE 1=1
|
||||
{f'AND ZMESSAGEDATE {filter_date}' if filter_date is not None else ''}
|
||||
{get_chat_condition(filter_chat[0], True, ["ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios")}
|
||||
{get_chat_condition(filter_chat[1], False, ["ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios")}
|
||||
ORDER BY ZMESSAGEDATE ASC;""")
|
||||
i = 0
|
||||
content = c.fetchone()
|
||||
while content is not None:
|
||||
ZCONTACTJID = content["ZCONTACTJID"]
|
||||
Z_PK = content["Z_PK"]
|
||||
is_group_message = content["ZGROUPINFO"] is not None
|
||||
if ZCONTACTJID not in data:
|
||||
data[ZCONTACTJID] = ChatStore(Device.IOS)
|
||||
path = f'{media_folder}/Media/Profile/{ZCONTACTJID.split("@")[0]}'
|
||||
avatars = glob(f"{path}*")
|
||||
if 0 < len(avatars) <= 1:
|
||||
data[ZCONTACTJID].their_avatar = avatars[0]
|
||||
else:
|
||||
for avatar in avatars:
|
||||
if avatar.endswith(".thumb"):
|
||||
data[ZCONTACTJID].their_avatar_thumb = avatar
|
||||
elif avatar.endswith(".jpg"):
|
||||
data[ZCONTACTJID].their_avatar = avatar
|
||||
ts = APPLE_TIME + content["ZMESSAGEDATE"]
|
||||
message = Message(
|
||||
from_me=content["ZISFROMME"],
|
||||
timestamp=ts,
|
||||
time=ts, # TODO: Could be bug
|
||||
key_id=content["ZSTANZAID"][:17],
|
||||
timezone_offset=timezone_offset
|
||||
)
|
||||
invalid = False
|
||||
if is_group_message and content["ZISFROMME"] == 0:
|
||||
name = None
|
||||
if content["ZMEMBERJID"] is not None:
|
||||
if content["ZMEMBERJID"] in data:
|
||||
name = data[content["ZMEMBERJID"]].name
|
||||
if "@" in content["ZMEMBERJID"]:
|
||||
fallback = content["ZMEMBERJID"].split('@')[0]
|
||||
else:
|
||||
fallback = None
|
||||
else:
|
||||
fallback = None
|
||||
message.sender = name or fallback
|
||||
else:
|
||||
message.sender = None
|
||||
if content["ZMESSAGETYPE"] == 6:
|
||||
# Metadata
|
||||
if is_group_message:
|
||||
# Group
|
||||
if content["ZTEXT"] is not None:
|
||||
# Chnaged name
|
||||
try:
|
||||
int(content["ZTEXT"])
|
||||
except ValueError:
|
||||
msg = f"The group name changed to {content['ZTEXT']}"
|
||||
message.data = msg
|
||||
message.meta = True
|
||||
else:
|
||||
invalid = True
|
||||
else:
|
||||
message.data = None
|
||||
else:
|
||||
message.data = None
|
||||
else:
|
||||
# real message
|
||||
if content["ZMETADATA"] is not None and content["ZMETADATA"].startswith(b"\x2a\x14"):
|
||||
quoted = content["ZMETADATA"][2:19]
|
||||
message.reply = quoted.decode()
|
||||
message.quoted_data = None # TODO
|
||||
if content["ZMESSAGETYPE"] == 15: # Sticker
|
||||
message.sticker = True
|
||||
|
||||
if content["ZISFROMME"] == 1:
|
||||
if content["ZMESSAGETYPE"] == 14:
|
||||
msg = "Message deleted"
|
||||
message.meta = True
|
||||
else:
|
||||
msg = content["ZTEXT"]
|
||||
if msg is not None:
|
||||
if "\r\n" in msg:
|
||||
msg = msg.replace("\r\n", "<br>")
|
||||
if "\n" in msg:
|
||||
msg = msg.replace("\n", "<br>")
|
||||
else:
|
||||
if content["ZMESSAGETYPE"] == 14:
|
||||
msg = "Message deleted"
|
||||
message.meta = True
|
||||
else:
|
||||
msg = content["ZTEXT"]
|
||||
if msg is not None:
|
||||
if "\r\n" in msg:
|
||||
msg = msg.replace("\r\n", "<br>")
|
||||
if "\n" in msg:
|
||||
msg = msg.replace("\n", "<br>")
|
||||
message.data = msg
|
||||
if not invalid:
|
||||
data[ZCONTACTJID].add_message(Z_PK, message)
|
||||
i += 1
|
||||
if i % 1000 == 0:
|
||||
print(f"Processing messages...({i}/{total_row_number})", end="\r")
|
||||
content = c.fetchone()
|
||||
print(
|
||||
f"Processing messages...({total_row_number}/{total_row_number})", end="\r")
|
||||
|
||||
|
||||
def media(db, data, media_folder, filter_date, filter_chat, separate_media=False):
|
||||
c = db.cursor()
|
||||
# Get media
|
||||
c.execute(f"""SELECT count()
|
||||
FROM ZWAMEDIAITEM
|
||||
INNER JOIN ZWAMESSAGE
|
||||
ON ZWAMEDIAITEM.ZMESSAGE = ZWAMESSAGE.Z_PK
|
||||
INNER JOIN ZWACHATSESSION
|
||||
ON ZWAMESSAGE.ZCHATSESSION = ZWACHATSESSION.Z_PK
|
||||
LEFT JOIN ZWAGROUPMEMBER
|
||||
ON ZWAMESSAGE.ZGROUPMEMBER = ZWAGROUPMEMBER.Z_PK
|
||||
WHERE 1=1
|
||||
{f'AND ZMESSAGEDATE {filter_date}' if filter_date is not None else ''}
|
||||
{get_chat_condition(filter_chat[0], True, ["ZWACHATSESSION.ZCONTACTJID","ZMEMBERJID"], "ZGROUPINFO", "ios")}
|
||||
{get_chat_condition(filter_chat[1], False, ["ZWACHATSESSION.ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios")}
|
||||
""")
|
||||
total_row_number = c.fetchone()[0]
|
||||
print(f"\nProcessing media...(0/{total_row_number})", end="\r")
|
||||
i = 0
|
||||
c.execute(f"""SELECT ZCONTACTJID,
|
||||
ZMESSAGE,
|
||||
ZMEDIALOCALPATH,
|
||||
ZMEDIAURL,
|
||||
ZVCARDSTRING,
|
||||
ZMEDIAKEY,
|
||||
ZTITLE
|
||||
FROM ZWAMEDIAITEM
|
||||
INNER JOIN ZWAMESSAGE
|
||||
ON ZWAMEDIAITEM.ZMESSAGE = ZWAMESSAGE.Z_PK
|
||||
INNER JOIN ZWACHATSESSION
|
||||
ON ZWAMESSAGE.ZCHATSESSION = ZWACHATSESSION.Z_PK
|
||||
LEFT JOIN ZWAGROUPMEMBER
|
||||
ON ZWAMESSAGE.ZGROUPMEMBER = ZWAGROUPMEMBER.Z_PK
|
||||
WHERE ZMEDIALOCALPATH IS NOT NULL
|
||||
{f'AND ZWAMESSAGE.ZMESSAGEDATE {filter_date}' if filter_date is not None else ''}
|
||||
{get_chat_condition(filter_chat[0], True, ["ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios")}
|
||||
{get_chat_condition(filter_chat[1], False, ["ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios")}
|
||||
ORDER BY ZCONTACTJID ASC""")
|
||||
content = c.fetchone()
|
||||
mime = MimeTypes()
|
||||
while content is not None:
|
||||
file_path = f"{media_folder}/Message/{content['ZMEDIALOCALPATH']}"
|
||||
ZMESSAGE = content["ZMESSAGE"]
|
||||
contact = data[content["ZCONTACTJID"]]
|
||||
message = contact.messages[ZMESSAGE]
|
||||
message.media = True
|
||||
if contact.media_base == "":
|
||||
contact.media_base = media_folder + "/"
|
||||
if os.path.isfile(file_path):
|
||||
message.data = '/'.join(file_path.split("/")[1:])
|
||||
if content["ZVCARDSTRING"] is None:
|
||||
guess = mime.guess_type(file_path)[0]
|
||||
if guess is not None:
|
||||
message.mime = guess
|
||||
else:
|
||||
message.mime = "application/octet-stream"
|
||||
else:
|
||||
message.mime = content["ZVCARDSTRING"]
|
||||
if separate_media:
|
||||
chat_display_name = slugify(contact.name or message.sender \
|
||||
or content["ZCONTACTJID"].split('@')[0], True)
|
||||
current_filename = file_path.split("/")[-1]
|
||||
new_folder = os.path.join(media_folder, "separated", chat_display_name)
|
||||
Path(new_folder).mkdir(parents=True, exist_ok=True)
|
||||
new_path = os.path.join(new_folder, current_filename)
|
||||
shutil.copy2(file_path, new_path)
|
||||
message.data = new_path
|
||||
else:
|
||||
message.data = "The media is missing"
|
||||
message.mime = "media"
|
||||
message.meta = True
|
||||
if content["ZTITLE"] is not None:
|
||||
message.caption = content["ZTITLE"]
|
||||
i += 1
|
||||
if i % 100 == 0:
|
||||
print(f"Processing media...({i}/{total_row_number})", end="\r")
|
||||
content = c.fetchone()
|
||||
print(
|
||||
f"Processing media...({total_row_number}/{total_row_number})", end="\r")
|
||||
|
||||
|
||||
def vcard(db, data, media_folder, filter_date, filter_chat):
|
||||
c = db.cursor()
|
||||
c.execute(f"""SELECT DISTINCT ZWAVCARDMENTION.ZMEDIAITEM,
|
||||
ZWAMEDIAITEM.ZMESSAGE,
|
||||
ZCONTACTJID,
|
||||
ZVCARDNAME,
|
||||
ZVCARDSTRING
|
||||
FROM ZWAVCARDMENTION
|
||||
INNER JOIN ZWAMEDIAITEM
|
||||
ON ZWAVCARDMENTION.ZMEDIAITEM = ZWAMEDIAITEM.Z_PK
|
||||
INNER JOIN ZWAMESSAGE
|
||||
ON ZWAMEDIAITEM.ZMESSAGE = ZWAMESSAGE.Z_PK
|
||||
INNER JOIN ZWACHATSESSION
|
||||
ON ZWAMESSAGE.ZCHATSESSION = ZWACHATSESSION.Z_PK
|
||||
LEFT JOIN ZWAGROUPMEMBER
|
||||
ON ZWAMESSAGE.ZGROUPMEMBER = ZWAGROUPMEMBER.Z_PK
|
||||
WHERE 1=1
|
||||
{f'AND ZWAMESSAGE.ZMESSAGEDATE {filter_date}' if filter_date is not None else ''}
|
||||
{get_chat_condition(filter_chat[0], True, ["ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios")}
|
||||
{get_chat_condition(filter_chat[1], False, ["ZCONTACTJID", "ZMEMBERJID"], "ZGROUPINFO", "ios")};""")
|
||||
contents = c.fetchall()
|
||||
total_row_number = len(contents)
|
||||
print(f"\nProcessing vCards...(0/{total_row_number})", end="\r")
|
||||
path = f'{media_folder}/Message/vCards'
|
||||
Path(path).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
for index, content in enumerate(contents):
|
||||
file_paths = []
|
||||
vcard_names = content["ZVCARDNAME"].split("_$!<Name-Separator>!$_")
|
||||
vcard_strings = content["ZVCARDSTRING"].split("_$!<VCard-Separator>!$_")
|
||||
|
||||
# If this is a list of contacts
|
||||
if len(vcard_names) > len(vcard_strings):
|
||||
vcard_names.pop(0) # Dismiss the first element, which is the group name
|
||||
|
||||
for name, vcard_string in zip(vcard_names, vcard_strings):
|
||||
file_name = "".join(x for x in name if x.isalnum())
|
||||
file_name = file_name.encode('utf-8')[:230].decode('utf-8', 'ignore')
|
||||
file_path = os.path.join(path, f"{file_name}.vcf")
|
||||
file_paths.append(file_path)
|
||||
|
||||
if not os.path.isfile(file_path):
|
||||
with open(file_path, "w", encoding="utf-8") as f:
|
||||
f.write(vcard_string)
|
||||
|
||||
vcard_summary = "This media include the following vCard file(s):<br>"
|
||||
vcard_summary += " | ".join([f'<a href="{htmle(fp)}">{htmle(name)}</a>' for name, fp in zip(vcard_names, file_paths)])
|
||||
message = data[content["ZCONTACTJID"]].messages[content["ZMESSAGE"]]
|
||||
message.data = vcard_summary
|
||||
message.mime = "text/x-vcard"
|
||||
message.media = True
|
||||
message.meta = True
|
||||
message.safe = True
|
||||
print(f"Processing vCards...({index + 1}/{total_row_number})", end="\r")
|
||||
146
Whatsapp_Chat_Exporter/ios_media_handler.py
Normal file
146
Whatsapp_Chat_Exporter/ios_media_handler.py
Normal file
@@ -0,0 +1,146 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
import shutil
|
||||
import sqlite3
|
||||
import os
|
||||
import getpass
|
||||
from Whatsapp_Chat_Exporter.utility import WhatsAppIdentifier
|
||||
from Whatsapp_Chat_Exporter.bplist import BPListReader
|
||||
try:
|
||||
from iphone_backup_decrypt import EncryptedBackup, RelativePath
|
||||
except ModuleNotFoundError:
|
||||
support_encrypted = False
|
||||
else:
|
||||
support_encrypted = True
|
||||
|
||||
|
||||
def extract_encrypted(base_dir, password, identifiers, decrypt_chunk_size):
|
||||
print("Trying to decrypt the iOS backup...", end="")
|
||||
backup = EncryptedBackup(
|
||||
backup_directory=base_dir,
|
||||
passphrase=password,
|
||||
cleanup=False,
|
||||
check_same_thread=False,
|
||||
decrypt_chunk_size=decrypt_chunk_size
|
||||
)
|
||||
print("Done\nDecrypting WhatsApp database...", end="")
|
||||
try:
|
||||
backup.extract_file(
|
||||
relative_path=RelativePath.WHATSAPP_MESSAGES,
|
||||
domain_like=identifiers.DOMAIN,
|
||||
output_filename=identifiers.MESSAGE
|
||||
)
|
||||
backup.extract_file(
|
||||
relative_path=RelativePath.WHATSAPP_CONTACTS,
|
||||
domain_like=identifiers.DOMAIN,
|
||||
output_filename=identifiers.CONTACT
|
||||
)
|
||||
except ValueError:
|
||||
print("Failed to decrypt backup: incorrect password?")
|
||||
exit(7)
|
||||
except FileNotFoundError:
|
||||
print("Essential WhatsApp files are missing from the iOS backup.")
|
||||
exit(6)
|
||||
else:
|
||||
print("Done")
|
||||
|
||||
def extract_progress_handler(file_id, domain, relative_path, n, total_files):
|
||||
if n % 100 == 0:
|
||||
print(f"Decrypting and extracting files...({n}/{total_files})", end="\r")
|
||||
return True
|
||||
|
||||
backup.extract_files(
|
||||
domain_like=identifiers.DOMAIN,
|
||||
output_folder=identifiers.DOMAIN,
|
||||
preserve_folders=True,
|
||||
filter_callback=extract_progress_handler
|
||||
)
|
||||
print(f"All required files are decrypted and extracted. ", end="\n")
|
||||
return backup
|
||||
|
||||
|
||||
def is_encrypted(base_dir):
|
||||
with sqlite3.connect(os.path.join(base_dir, "Manifest.db")) as f:
|
||||
c = f.cursor()
|
||||
try:
|
||||
c.execute("""SELECT count()
|
||||
FROM Files
|
||||
""")
|
||||
except sqlite3.OperationalError as e:
|
||||
raise e # These error cannot be used to determine if the backup is encrypted
|
||||
except sqlite3.DatabaseError:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
def extract_media(base_dir, identifiers, decrypt_chunk_size):
|
||||
if is_encrypted(base_dir):
|
||||
if not support_encrypted:
|
||||
print("You don't have the dependencies to handle encrypted backup.")
|
||||
print("Read more on how to deal with encrypted backup:")
|
||||
print("https://github.com/KnugiHK/Whatsapp-Chat-Exporter/blob/main/README.md#usage")
|
||||
return False
|
||||
print("Encryption detected on the backup!")
|
||||
password = getpass.getpass("Enter the password for the backup:")
|
||||
extract_encrypted(base_dir, password, identifiers, decrypt_chunk_size)
|
||||
else:
|
||||
wts_db = os.path.join(base_dir, identifiers.MESSAGE[:2], identifiers.MESSAGE)
|
||||
contact_db = os.path.join(base_dir, identifiers.CONTACT[:2], identifiers.CONTACT)
|
||||
if not os.path.isfile(wts_db):
|
||||
if identifiers is WhatsAppIdentifier:
|
||||
print("WhatsApp database not found.")
|
||||
else:
|
||||
print("WhatsApp Business database not found.")
|
||||
exit()
|
||||
else:
|
||||
shutil.copyfile(wts_db, identifiers.MESSAGE)
|
||||
if not os.path.isfile(contact_db):
|
||||
print("Contact database not found. Skipping...")
|
||||
else:
|
||||
shutil.copyfile(contact_db, identifiers.CONTACT)
|
||||
_wts_id = identifiers.DOMAIN
|
||||
with sqlite3.connect(os.path.join(base_dir, "Manifest.db")) as manifest:
|
||||
manifest.row_factory = sqlite3.Row
|
||||
c = manifest.cursor()
|
||||
c.execute(
|
||||
f"""SELECT count()
|
||||
FROM Files
|
||||
WHERE domain = '{_wts_id}'"""
|
||||
)
|
||||
total_row_number = c.fetchone()[0]
|
||||
print(f"Extracting WhatsApp files...(0/{total_row_number})", end="\r")
|
||||
c.execute(f"""SELECT fileID,
|
||||
relativePath,
|
||||
flags,
|
||||
file AS metadata,
|
||||
ROW_NUMBER() OVER(ORDER BY relativePath) AS _index
|
||||
FROM Files
|
||||
WHERE domain = '{_wts_id}'
|
||||
ORDER BY relativePath""")
|
||||
if not os.path.isdir(_wts_id):
|
||||
os.mkdir(_wts_id)
|
||||
row = c.fetchone()
|
||||
while row is not None:
|
||||
if row["relativePath"] == "":
|
||||
row = c.fetchone()
|
||||
continue
|
||||
destination = os.path.join(_wts_id, row["relativePath"])
|
||||
hashes = row["fileID"]
|
||||
folder = hashes[:2]
|
||||
flags = row["flags"]
|
||||
if flags == 2:
|
||||
try:
|
||||
os.mkdir(destination)
|
||||
except FileExistsError:
|
||||
pass
|
||||
elif flags == 1:
|
||||
shutil.copyfile(os.path.join(base_dir, folder, hashes), destination)
|
||||
metadata = BPListReader(row["metadata"]).parse()
|
||||
creation = metadata["$objects"][1]["Birth"]
|
||||
modification = metadata["$objects"][1]["LastModified"]
|
||||
os.utime(destination, (modification, modification))
|
||||
if row["_index"] % 100 == 0:
|
||||
print(f"Extracting WhatsApp files...({row['_index']}/{total_row_number})", end="\r")
|
||||
row = c.fetchone()
|
||||
print(f"Extracting WhatsApp files...({total_row_number}/{total_row_number})", end="\n")
|
||||
428
Whatsapp_Chat_Exporter/utility.py
Normal file
428
Whatsapp_Chat_Exporter/utility.py
Normal file
@@ -0,0 +1,428 @@
|
||||
import jinja2
|
||||
import json
|
||||
import os
|
||||
import unicodedata
|
||||
import re
|
||||
import math
|
||||
from bleach import clean as sanitize
|
||||
from markupsafe import Markup
|
||||
from datetime import datetime, timedelta
|
||||
from enum import IntEnum
|
||||
from Whatsapp_Chat_Exporter.data_model import ChatStore
|
||||
try:
|
||||
from enum import StrEnum, IntEnum
|
||||
except ImportError:
|
||||
# < Python 3.11
|
||||
from enum import Enum
|
||||
class StrEnum(str, Enum):
|
||||
pass
|
||||
|
||||
class IntEnum(int, Enum):
|
||||
pass
|
||||
|
||||
MAX_SIZE = 4 * 1024 * 1024 # Default 4MB
|
||||
ROW_SIZE = 0x3D0
|
||||
|
||||
|
||||
def convert_time_unit(time_second: int):
|
||||
time = str(timedelta(seconds=time_second))
|
||||
if "day" not in time:
|
||||
if time_second < 1:
|
||||
time = "less than a second"
|
||||
elif time_second == 1:
|
||||
time = "a second"
|
||||
elif time_second < 60:
|
||||
time = time[5:][1 if time_second < 10 else 0:] + " seconds"
|
||||
elif time_second == 60:
|
||||
time = "a minute"
|
||||
elif time_second < 3600:
|
||||
time = time[2:] + " minutes"
|
||||
elif time_second == 3600:
|
||||
time = "an hour"
|
||||
else:
|
||||
time += " hour"
|
||||
return time
|
||||
|
||||
|
||||
def bytes_to_readable(size_bytes: int):
|
||||
"""From https://stackoverflow.com/a/14822210/9478891
|
||||
Authors: james-sapam & other contributors
|
||||
Licensed under CC BY-SA 3.0
|
||||
See git commit logs for changes, if any.
|
||||
"""
|
||||
if size_bytes == 0:
|
||||
return "0B"
|
||||
size_name = ("B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB")
|
||||
i = int(math.floor(math.log(size_bytes, 1024)))
|
||||
p = math.pow(1024, i)
|
||||
s = round(size_bytes / p, 2)
|
||||
return "%s %s" % (s, size_name[i])
|
||||
|
||||
|
||||
def readable_to_bytes(size_str: str):
|
||||
SIZE_UNITS = {
|
||||
'B': 1,
|
||||
'KB': 1024,
|
||||
'MB': 1024**2,
|
||||
'GB': 1024**3,
|
||||
'TB': 1024**4,
|
||||
'PB': 1024**5,
|
||||
'EB': 1024**6,
|
||||
'ZB': 1024**7,
|
||||
'YB': 1024**8
|
||||
}
|
||||
size_str = size_str.upper().strip()
|
||||
number, unit = size_str[:-2].strip(), size_str[-2:].strip()
|
||||
if unit not in SIZE_UNITS or not number.isnumeric():
|
||||
raise ValueError("Invalid input for size_str. Example: 1024GB")
|
||||
return int(number) * SIZE_UNITS[unit]
|
||||
|
||||
|
||||
def sanitize_except(html):
|
||||
return Markup(sanitize(html, tags=["br"]))
|
||||
|
||||
|
||||
def determine_day(last, current):
|
||||
last = datetime.fromtimestamp(last).date()
|
||||
current = datetime.fromtimestamp(current).date()
|
||||
if last == current:
|
||||
return None
|
||||
else:
|
||||
return current
|
||||
|
||||
|
||||
def check_update():
|
||||
import urllib.request
|
||||
import json
|
||||
from sys import platform
|
||||
from .__init__ import __version__
|
||||
|
||||
package_url_json = "https://pypi.org/pypi/whatsapp-chat-exporter/json"
|
||||
try:
|
||||
raw = urllib.request.urlopen(package_url_json)
|
||||
except Exception:
|
||||
print("Failed to check for updates.")
|
||||
return 1
|
||||
else:
|
||||
with raw:
|
||||
package_info = json.load(raw)
|
||||
latest_version = tuple(map(int, package_info["info"]["version"].split(".")))
|
||||
current_version = tuple(map(int, __version__.split(".")))
|
||||
if current_version < latest_version:
|
||||
print("===============Update===============")
|
||||
print("A newer version of WhatsApp Chat Exporter is available.")
|
||||
print("Current version: " + __version__)
|
||||
print("Latest version: " + package_info["info"]["version"])
|
||||
if platform == "win32":
|
||||
print("Update with: pip install --upgrade whatsapp-chat-exporter")
|
||||
else:
|
||||
print("Update with: pip3 install --upgrade whatsapp-chat-exporter")
|
||||
print("====================================")
|
||||
else:
|
||||
print("You are using the latest version of WhatsApp Chat Exporter.")
|
||||
return 0
|
||||
|
||||
|
||||
def rendering(
|
||||
output_file_name,
|
||||
template,
|
||||
name,
|
||||
msgs,
|
||||
contact,
|
||||
w3css,
|
||||
next,
|
||||
chat,
|
||||
):
|
||||
if chat.their_avatar_thumb is None and chat.their_avatar is not None:
|
||||
their_avatar_thumb = chat.their_avatar
|
||||
else:
|
||||
their_avatar_thumb = chat.their_avatar_thumb
|
||||
with open(output_file_name, "w", encoding="utf-8") as f:
|
||||
f.write(
|
||||
template.render(
|
||||
name=name,
|
||||
msgs=msgs,
|
||||
my_avatar=chat.my_avatar,
|
||||
their_avatar=chat.their_avatar,
|
||||
their_avatar_thumb=their_avatar_thumb,
|
||||
w3css=w3css,
|
||||
next=next,
|
||||
status=chat.status,
|
||||
media_base=chat.media_base
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
class Device(StrEnum):
|
||||
IOS = "ios"
|
||||
ANDROID = "android"
|
||||
EXPORTED = "exported"
|
||||
|
||||
|
||||
def import_from_json(json_file, data):
|
||||
from Whatsapp_Chat_Exporter.data_model import ChatStore, Message
|
||||
with open(json_file, "r") as f:
|
||||
temp_data = json.loads(f.read())
|
||||
total_row_number = len(tuple(temp_data.keys()))
|
||||
print(f"Importing chats from JSON...(0/{total_row_number})", end="\r")
|
||||
for index, (jid, chat_data) in enumerate(temp_data.items()):
|
||||
chat = ChatStore(chat_data.get("type"), chat_data.get("name"))
|
||||
chat.my_avatar = chat_data.get("my_avatar")
|
||||
chat.their_avatar = chat_data.get("their_avatar")
|
||||
chat.their_avatar_thumb = chat_data.get("their_avatar_thumb")
|
||||
chat.status = chat_data.get("status")
|
||||
for id, msg in chat_data.get("messages").items():
|
||||
message = Message(
|
||||
msg["from_me"],
|
||||
msg["timestamp"],
|
||||
msg["time"],
|
||||
msg["key_id"],
|
||||
)
|
||||
message.media = msg.get("media")
|
||||
message.meta = msg.get("meta")
|
||||
message.data = msg.get("data")
|
||||
message.sender = msg.get("sender")
|
||||
message.safe = msg.get("safe")
|
||||
message.mime = msg.get("mime")
|
||||
message.reply = msg.get("reply")
|
||||
message.quoted_data = msg.get("quoted_data")
|
||||
message.caption = msg.get("caption")
|
||||
message.thumb = msg.get("thumb")
|
||||
message.sticker = msg.get("sticker")
|
||||
chat.add_message(id, message)
|
||||
data[jid] = chat
|
||||
print(f"Importing chats from JSON...({index + 1}/{total_row_number})", end="\r")
|
||||
|
||||
|
||||
def sanitize_filename(file_name: str):
|
||||
return "".join(x for x in file_name if x.isalnum() or x in "- ")
|
||||
|
||||
|
||||
def get_file_name(contact: str, chat: ChatStore):
|
||||
if "@" not in contact and contact not in ("000000000000000", "000000000000001", "ExportedChat"):
|
||||
raise ValueError("Unexpected contact format: " + contact)
|
||||
phone_number = contact.split('@')[0]
|
||||
if "-" in contact and chat.name is not None:
|
||||
file_name = ""
|
||||
else:
|
||||
file_name = phone_number
|
||||
|
||||
if chat.name is not None:
|
||||
if file_name != "":
|
||||
file_name += "-"
|
||||
file_name += chat.name.replace("/", "-").replace("\\", "-")
|
||||
name = chat.name
|
||||
else:
|
||||
name = phone_number
|
||||
|
||||
return sanitize_filename(file_name), name
|
||||
|
||||
|
||||
def get_chat_condition(filter, include, columns, jid=None, platform=None):
|
||||
if filter is not None:
|
||||
conditions = []
|
||||
if len(columns) < 2 and jid is not None:
|
||||
raise ValueError("There must be at least two elements in argument columns if jid is not None")
|
||||
if jid is not None:
|
||||
if platform == "android":
|
||||
is_group = f"{jid}.type == 1"
|
||||
elif platform == "ios":
|
||||
is_group = f"{jid} IS NOT NULL"
|
||||
else:
|
||||
raise ValueError("Only android and ios are supported for argument platform if jid is not None")
|
||||
for index, chat in enumerate(filter):
|
||||
if include:
|
||||
conditions.append(f"{' OR' if index > 0 else ''} {columns[0]} LIKE '%{chat}%'")
|
||||
if len(columns) > 1:
|
||||
conditions.append(f" OR ({columns[1]} LIKE '%{chat}%' AND {is_group})")
|
||||
else:
|
||||
conditions.append(f"{' AND' if index > 0 else ''} {columns[0]} NOT LIKE '%{chat}%'")
|
||||
if len(columns) > 1:
|
||||
conditions.append(f" AND ({columns[1]} NOT LIKE '%{chat}%' AND {is_group})")
|
||||
return f"AND ({' '.join(conditions)})"
|
||||
else:
|
||||
return ""
|
||||
|
||||
def _is_message_empty(message):
|
||||
return (message.data is None or message.data == "") and not message.media
|
||||
|
||||
def chat_is_empty(chat: ChatStore):
|
||||
return len(chat.messages) == 0 or all(_is_message_empty(message) for message in chat.messages.values())
|
||||
|
||||
|
||||
# Android Specific
|
||||
CRYPT14_OFFSETS = (
|
||||
{"iv": 67, "db": 191},
|
||||
{"iv": 67, "db": 190},
|
||||
{"iv": 66, "db": 99},
|
||||
{"iv": 67, "db": 193},
|
||||
{"iv": 67, "db": 194},
|
||||
)
|
||||
|
||||
|
||||
class Crypt(IntEnum):
|
||||
CRYPT15 = 15
|
||||
CRYPT14 = 14
|
||||
CRYPT12 = 12
|
||||
|
||||
|
||||
class DbType(StrEnum):
|
||||
MESSAGE = "message"
|
||||
CONTACT = "contact"
|
||||
|
||||
|
||||
def brute_force_offset(max_iv=200, max_db=200):
|
||||
for iv in range(0, max_iv):
|
||||
for db in range(0, max_db):
|
||||
yield iv, iv + 16, db
|
||||
|
||||
|
||||
def determine_metadata(content, init_msg):
|
||||
msg = init_msg if init_msg else ""
|
||||
if content["is_me_joined"] == 1: # Override
|
||||
return f"You were added into the group by {msg}"
|
||||
if content["action_type"] == 1:
|
||||
msg += f''' changed the group name to "{content['data']}"'''
|
||||
elif content["action_type"] == 4:
|
||||
msg += " was added to the group"
|
||||
elif content["action_type"] == 5:
|
||||
msg += " left the group"
|
||||
elif content["action_type"] == 6:
|
||||
msg += f" changed the group icon"
|
||||
elif content["action_type"] == 7:
|
||||
msg = "You were removed"
|
||||
elif content["action_type"] == 8:
|
||||
msg += ("WhatsApp Internal Error Occurred: "
|
||||
"you cannot send message to this group")
|
||||
elif content["action_type"] == 9:
|
||||
msg += " created a broadcast channel"
|
||||
elif content["action_type"] == 10:
|
||||
try:
|
||||
old = content['old_jid'].split('@')[0]
|
||||
new = content['new_jid'].split('@')[0]
|
||||
except (AttributeError, IndexError):
|
||||
return None
|
||||
else:
|
||||
msg = f"{old} changed their number to {new}"
|
||||
elif content["action_type"] == 11:
|
||||
msg += f''' created a group with name: "{content['data']}"'''
|
||||
elif content["action_type"] == 12:
|
||||
msg += f" added someone" # TODO: Find out who
|
||||
elif content["action_type"] == 13:
|
||||
return # Someone left the group
|
||||
elif content["action_type"] == 14:
|
||||
msg += f" removed someone" # TODO: Find out who
|
||||
elif content["action_type"] == 15:
|
||||
return # Someone promoted someone as an admin
|
||||
elif content["action_type"] == 18:
|
||||
if msg != "You":
|
||||
msg = f"The security code between you and {msg} changed"
|
||||
else:
|
||||
msg = "The security code in this chat changed"
|
||||
elif content["action_type"] == 19:
|
||||
msg = "This chat is now end-to-end encrypted"
|
||||
elif content["action_type"] == 20:
|
||||
msg = "Someone joined this group by using a invite link" # TODO: Find out who
|
||||
elif content["action_type"] == 27:
|
||||
msg += " changed the group description to:<br>"
|
||||
msg += content['data'].replace("\n", '<br>')
|
||||
elif content["action_type"] == 28:
|
||||
try:
|
||||
old = content['old_jid'].split('@')[0]
|
||||
new = content['new_jid'].split('@')[0]
|
||||
except (AttributeError, IndexError):
|
||||
return None
|
||||
else:
|
||||
msg = f"{old} changed their number to {new}"
|
||||
elif content["action_type"] == 46:
|
||||
return # Voice message in PM??? Seems no need to handle.
|
||||
elif content["action_type"] == 47:
|
||||
msg = "The contact is an official business account"
|
||||
elif content["action_type"] == 50:
|
||||
msg = "The contact's account type changed from business to standard"
|
||||
elif content["action_type"] == 56:
|
||||
msg = "Messgae timer was enabled/updated/disabled"
|
||||
elif content["action_type"] == 57:
|
||||
if msg != "You":
|
||||
msg = f"The security code between you and {msg} changed"
|
||||
else:
|
||||
msg = "The security code in this chat changed"
|
||||
elif content["action_type"] == 58:
|
||||
msg = "You blocked this contact"
|
||||
elif content["action_type"] == 67:
|
||||
return # (PM) this contact use secure service from Facebook???
|
||||
elif content["action_type"] == 69:
|
||||
return # (PM) this contact use secure service from Facebook??? What's the difference with 67????
|
||||
else:
|
||||
return # Unsupported
|
||||
return msg
|
||||
|
||||
|
||||
def get_status_location(output_folder, offline_static):
|
||||
w3css = "https://www.w3schools.com/w3css/4/w3.css"
|
||||
if not offline_static:
|
||||
return w3css
|
||||
import urllib.request
|
||||
static_folder = os.path.join(output_folder, offline_static)
|
||||
if not os.path.isdir(static_folder):
|
||||
os.mkdir(static_folder)
|
||||
w3css_path = os.path.join(static_folder, "w3.css")
|
||||
if not os.path.isfile(w3css_path):
|
||||
with urllib.request.urlopen(w3css) as resp:
|
||||
with open(w3css_path, "wb") as f: f.write(resp.read())
|
||||
w3css = os.path.join(offline_static, "w3.css")
|
||||
|
||||
|
||||
def setup_template(template, no_avatar):
|
||||
if template is None:
|
||||
template_dir = os.path.dirname(__file__)
|
||||
template_file = "whatsapp.html"
|
||||
else:
|
||||
template_dir = os.path.dirname(template)
|
||||
template_file = os.path.basename(template)
|
||||
template_loader = jinja2.FileSystemLoader(searchpath=template_dir)
|
||||
template_env = jinja2.Environment(loader=template_loader, autoescape=True)
|
||||
template_env.globals.update(
|
||||
determine_day=determine_day,
|
||||
no_avatar=no_avatar
|
||||
)
|
||||
template_env.filters['sanitize_except'] = sanitize_except
|
||||
return template_env.get_template(template_file)
|
||||
|
||||
# iOS Specific
|
||||
APPLE_TIME = datetime.timestamp(datetime(2001, 1, 1))
|
||||
|
||||
|
||||
def slugify(value, allow_unicode=False):
|
||||
"""
|
||||
Taken from https://github.com/django/django/blob/master/django/utils/text.py
|
||||
Convert to ASCII if 'allow_unicode' is False. Convert spaces or repeated
|
||||
dashes to single dashes. Remove characters that aren't alphanumerics,
|
||||
underscores, or hyphens. Convert to lowercase. Also strip leading and
|
||||
trailing whitespace, dashes, and underscores.
|
||||
"""
|
||||
value = str(value)
|
||||
if allow_unicode:
|
||||
value = unicodedata.normalize('NFKC', value)
|
||||
else:
|
||||
value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore').decode('ascii')
|
||||
value = re.sub(r'[^\w\s-]', '', value.lower())
|
||||
return re.sub(r'[-\s]+', '-', value).strip('-_')
|
||||
|
||||
|
||||
class WhatsAppIdentifier(StrEnum):
|
||||
MESSAGE = "7c7fba66680ef796b916b067077cc246adacf01d"
|
||||
CONTACT = "b8548dc30aa1030df0ce18ef08b882cf7ab5212f"
|
||||
DOMAIN = "AppDomainGroup-group.net.whatsapp.WhatsApp.shared"
|
||||
|
||||
|
||||
class WhatsAppBusinessIdentifier(StrEnum):
|
||||
MESSAGE = "724bd3b98b18518b455a87c1f3ac3a0d189c4466"
|
||||
CONTACT = "d7246a707f51ddf8b17ee2dddabd9e0a4da5c552"
|
||||
DOMAIN = "AppDomainGroup-group.net.whatsapp.WhatsAppSMB.shared"
|
||||
|
||||
class JidType(IntEnum):
|
||||
PM = 0
|
||||
GROUP = 1
|
||||
SYSTEM_BROADCAST = 5
|
||||
STATUS = 11
|
||||
82
Whatsapp_Chat_Exporter/vcards_contacts.py
Normal file
82
Whatsapp_Chat_Exporter/vcards_contacts.py
Normal file
@@ -0,0 +1,82 @@
|
||||
import vobject
|
||||
from typing import List, TypedDict
|
||||
|
||||
|
||||
class ExportedContactNumbers(TypedDict):
|
||||
full_name: str
|
||||
numbers: List[str]
|
||||
|
||||
|
||||
class ContactsFromVCards:
|
||||
def __init__(self) -> None:
|
||||
self.contact_mapping = []
|
||||
|
||||
def is_empty(self):
|
||||
return self.contact_mapping == []
|
||||
|
||||
def load_vcf_file(self, vcf_file_path: str, default_country_code: str):
|
||||
self.contact_mapping = read_vcards_file(vcf_file_path, default_country_code)
|
||||
|
||||
def enrich_from_vcards(self, chats):
|
||||
for number, name in self.contact_mapping:
|
||||
# short number must be a bad contact, lets skip it
|
||||
if len(number) <= 5:
|
||||
continue
|
||||
|
||||
for chat in filter_chats_by_prefix(chats, number).values():
|
||||
if not hasattr(chat, 'name') or (hasattr(chat, 'name') and chat.name is None):
|
||||
setattr(chat, 'name', name)
|
||||
|
||||
|
||||
def read_vcards_file(vcf_file_path, default_country_code: str):
|
||||
contacts = []
|
||||
with open(vcf_file_path, mode="r", encoding="utf-8") as f:
|
||||
reader = vobject.readComponents(f)
|
||||
for row in reader:
|
||||
if hasattr(row, 'fn'):
|
||||
name = str(row.fn.value)
|
||||
elif hasattr(row, 'n'):
|
||||
name = str(row.n.value)
|
||||
else:
|
||||
name = None
|
||||
if not hasattr(row, 'tel') or name is None:
|
||||
continue
|
||||
contact: ExportedContactNumbers = {
|
||||
"full_name": name,
|
||||
"numbers": list(map(lambda tel: tel.value, row.tel_list)),
|
||||
}
|
||||
contacts.append(contact)
|
||||
|
||||
return map_number_to_name(contacts, default_country_code)
|
||||
|
||||
|
||||
def filter_chats_by_prefix(chats, prefix: str):
|
||||
return {k: v for k, v in chats.items() if k.startswith(prefix)}
|
||||
|
||||
|
||||
def map_number_to_name(contacts, default_country_code: str):
|
||||
mapping = []
|
||||
for contact in contacts:
|
||||
for index, num in enumerate(contact['numbers']):
|
||||
normalized = normalize_number(num, default_country_code)
|
||||
if len(contact['numbers']) > 1:
|
||||
name = f"{contact['full_name']} ({index+1})"
|
||||
else:
|
||||
name = contact['full_name']
|
||||
mapping.append((normalized, name))
|
||||
return mapping
|
||||
|
||||
|
||||
def normalize_number(number: str, country_code: str):
|
||||
# Clean the number
|
||||
number = ''.join(c for c in number if c.isdigit() or c == "+")
|
||||
|
||||
# A number that starts with a + or 00 means it already have a country code
|
||||
for starting_char in ('+', "00"):
|
||||
if number.startswith(starting_char):
|
||||
return number[len(starting_char):]
|
||||
|
||||
# leading zero should be removed
|
||||
if starting_char == '0':
|
||||
number = number[1:]
|
||||
return country_code + number # fall back
|
||||
20
Whatsapp_Chat_Exporter/vcards_contacts_test.py
Normal file
20
Whatsapp_Chat_Exporter/vcards_contacts_test.py
Normal file
@@ -0,0 +1,20 @@
|
||||
# from contacts_names_from_vcards import readVCardsFile
|
||||
|
||||
from Whatsapp_Chat_Exporter.vcards_contacts import normalize_number, read_vcards_file
|
||||
|
||||
|
||||
def test_readVCardsFile():
|
||||
assert len(read_vcards_file("contacts.vcf", "973")) > 0
|
||||
|
||||
def test_create_number_to_name_dicts():
|
||||
pass
|
||||
|
||||
def test_fuzzy_match_numbers():
|
||||
pass
|
||||
|
||||
def test_normalize_number():
|
||||
assert normalize_number('0531234567', '1') == '1531234567'
|
||||
assert normalize_number('001531234567', '2') == '1531234567'
|
||||
assert normalize_number('+1531234567', '34') == '1531234567'
|
||||
assert normalize_number('053(123)4567', '34') == '34531234567'
|
||||
assert normalize_number('0531-234-567', '58') == '58531234567'
|
||||
@@ -2,11 +2,10 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>Whatsapp - {{ name }}</title>
|
||||
<link rel="stylesheet" href="https://www.w3schools.com/w3css/4/w3.css">
|
||||
<meta charset="UTF-8">
|
||||
<link rel="stylesheet" href="{{w3css}}">
|
||||
<style>
|
||||
@import url('https://fonts.googleapis.com/css2?family=Noto+Sans+HK:wght@300;400&display=swap');
|
||||
html {
|
||||
font-family: 'Noto Sans HK', sans-serif;
|
||||
html, body {
|
||||
font-size: 12px;
|
||||
scroll-behavior: smooth;
|
||||
}
|
||||
@@ -34,54 +33,124 @@
|
||||
img, video {
|
||||
max-width:100%;
|
||||
}
|
||||
a.anchor {
|
||||
display: block;
|
||||
position: relative;
|
||||
top: -100px;
|
||||
visibility: hidden;
|
||||
}
|
||||
div.reply{
|
||||
font-size: 13px;
|
||||
text-decoration: none;
|
||||
}
|
||||
div:target::before {
|
||||
content: '';
|
||||
display: block;
|
||||
height: 115px;
|
||||
margin-top: -115px;
|
||||
visibility: hidden;
|
||||
}
|
||||
div:target {
|
||||
border-style: solid;
|
||||
border-width: 2px;
|
||||
animation: border-blink 0.5s steps(1) 5;
|
||||
border-color: rgba(0,0,0,0)
|
||||
}
|
||||
table {
|
||||
width: 100%;
|
||||
}
|
||||
@keyframes border-blink {
|
||||
0% {
|
||||
border-color: #2196F3;
|
||||
}
|
||||
50% {
|
||||
border-color: rgba(0,0,0,0);
|
||||
}
|
||||
}
|
||||
.avatar {
|
||||
border-radius:50%;
|
||||
overflow:hidden;
|
||||
max-width: 64px;
|
||||
max-height: 64px;
|
||||
}
|
||||
.name {
|
||||
color: #3892da;
|
||||
}
|
||||
.pad-left-10 {
|
||||
padding-left: 10px;
|
||||
}
|
||||
.pad-right-10 {
|
||||
padding-right: 10px;
|
||||
}
|
||||
.reply_link {
|
||||
color: #168acc;
|
||||
}
|
||||
.blue {
|
||||
color: #70777a;
|
||||
}
|
||||
.sticker {
|
||||
max-width: 100px !important;
|
||||
max-height: 100px !important;
|
||||
}
|
||||
</style>
|
||||
<base href="{{ media_base }}" target="_blank">
|
||||
</head>
|
||||
<body>
|
||||
<header class="w3-center w3-top">Chat history with {{ name }}</header>
|
||||
<header class="w3-center w3-top">
|
||||
Chat history with {{ name }}
|
||||
{% if status is not none %}
|
||||
<br>
|
||||
<span class="w3-small">{{ status }}</span>
|
||||
{% endif %}
|
||||
</header>
|
||||
<article class="w3-container">
|
||||
<div class="table" style="width:100%">
|
||||
<div class="table">
|
||||
{% set last = {'last': 946688461.001} %}
|
||||
{% for msg in msgs -%}
|
||||
<div class="w3-row" style="padding-bottom: 10px">
|
||||
<a class="anchor" id="{{ msg.key_id }}"></a>
|
||||
<div class="w3-row w3-padding-small w3-margin-bottom" id="{{ msg.key_id }}">
|
||||
{% if determine_day(last.last, msg.timestamp) is not none %}
|
||||
<div class="w3-center" style="color:#70777c;padding: 10px 0 10px 0;">{{ determine_day(last.last, msg.timestamp) }}</div>
|
||||
<div class="w3-center w3-padding-16 blue">{{ determine_day(last.last, msg.timestamp) }}</div>
|
||||
{% if last.update({'last': msg.timestamp}) %}{% endif %}
|
||||
{% endif %}
|
||||
{% if msg.from_me == true %}
|
||||
<div class="w3-row">
|
||||
<div style="float: left; color:#70777c;">{{ msg.time }}</div>
|
||||
<div style="padding-left: 10px; text-align: right; color: #3892da;">You</div>
|
||||
<div class="w3-left blue">{{ msg.time }}</div>
|
||||
<div class="name w3-right-align pad-left-10">You</div>
|
||||
</div>
|
||||
<div class="w3-row">
|
||||
{% if not no_avatar and my_avatar is not none %}
|
||||
<div class="w3-col m10 l10">
|
||||
<div style="text-align: right;">
|
||||
{% else %}
|
||||
<div class="w3-col m12 l12">
|
||||
{% endif %}
|
||||
<div class="w3-right-align">
|
||||
{% if msg.reply is not none %}
|
||||
<div class="reply">
|
||||
<span style="color: #70777a;">Replying to </span>
|
||||
<a href="#{{msg.reply}}" style="color: #168acc;">"{{ msg.quoted_data or 'media' }}"</a>
|
||||
<span class="blue">Replying to </span>
|
||||
<a href="#{{msg.reply}}" class="reply_link">
|
||||
{% if msg.quoted_data is not none %}
|
||||
"{{msg.quoted_data}}"
|
||||
{% else %}
|
||||
this message
|
||||
{% endif %}
|
||||
</a>
|
||||
</div>
|
||||
{% endif %}
|
||||
{% if msg.meta == true or msg.media == false and msg.data is none %}
|
||||
<div style="text-align: center;" class="w3-panel w3-border-blue w3-pale-blue w3-rightbar w3-leftbar">
|
||||
<p>{{ msg.data or 'This message is not supported' }}</p>
|
||||
</div>
|
||||
<div class="w3-panel w3-border-blue w3-pale-blue w3-rightbar w3-leftbar w3-threequarter w3-center">
|
||||
{% if msg.safe %}
|
||||
<p>{{ msg.data | safe or 'Not supported WhatsApp internal message' }}</p>
|
||||
{% else %}
|
||||
<p>{{ msg.data or 'Not supported WhatsApp internal message' }}</p>
|
||||
{% endif %}
|
||||
</div>
|
||||
{% if msg.caption is not none %}
|
||||
<div class="w3-container">
|
||||
{{ msg.caption | urlize(none, true, '_blank') }}
|
||||
</div>
|
||||
{% endif %}
|
||||
{% else %}
|
||||
{% if msg.media == false %}
|
||||
{{ msg.data | sanitize_except() }}
|
||||
{{ msg.data | sanitize_except() | urlize(none, true, '_blank') }}
|
||||
{% else %}
|
||||
{% if "image/" in msg.mime %}
|
||||
<a href="{{ msg.data }}"><img src="{{ msg.data }}" /></a>
|
||||
<a href="{{ msg.data }}">
|
||||
<img src="{{ msg.thumb if msg.thumb is not none else msg.data }}" {{ 'class="sticker"' | safe if msg.sticker }} loading="lazy"/>
|
||||
</a>
|
||||
{% elif "audio/" in msg.mime %}
|
||||
<audio controls="controls" autobuffer="autobuffer">
|
||||
<source src="{{ msg.data }}" />
|
||||
@@ -91,53 +160,87 @@
|
||||
<source src="{{ msg.data }}" />
|
||||
</video>
|
||||
{% elif "/" in msg.mime %}
|
||||
<div style="text-align: center;" class="w3-panel w3-border-blue w3-pale-blue w3-rightbar w3-leftbar">
|
||||
<p>The file cannot be displayed here, however it should be located at {{ msg.data }}</p>
|
||||
<div class="w3-panel w3-border-blue w3-pale-blue w3-rightbar w3-leftbar w3-threequarter w3-center">
|
||||
<p>The file cannot be displayed here, however it should be located at <a href="./{{ msg.data }}">here</a></p>
|
||||
</div>
|
||||
{% else %}
|
||||
{% filter escape %}{{ msg.data }}{% endfilter %}
|
||||
{% endif %}
|
||||
{% if msg.caption is not none %}
|
||||
<br>
|
||||
{{ msg.caption }}
|
||||
<div class="w3-container">
|
||||
{{ msg.caption | urlize(none, true, '_blank') }}
|
||||
</div>
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
</div>
|
||||
</div>
|
||||
<div class="w3-col m2 l2" style="padding-left: 10px"><img src="{{ my_avatar }}" onerror="this.style.display='none'"></div>
|
||||
{% if not no_avatar and my_avatar is not none %}
|
||||
<div class="w3-col m2 l2 pad-left-10">
|
||||
<a href="{{ my_avatar }}">
|
||||
<img src="{{ my_avatar }}" onerror="this.style.display='none'" class="avatar" loading="lazy">
|
||||
</a>
|
||||
</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
{% else %}
|
||||
<div class="w3-row">
|
||||
<div style="padding-right: 10px; float: left; color: #3892da;">
|
||||
<div class="w3-left pad-right-10 name">
|
||||
{% if msg.sender is not none %}
|
||||
{{ msg.sender }}
|
||||
{% else %}
|
||||
{{ name }}
|
||||
{% endif %}
|
||||
</div>
|
||||
<div style="text-align: right; color:#70777c;">{{ msg.time }}</div>
|
||||
<div class="w3-right-align blue">{{ msg.time }}</div>
|
||||
</div>
|
||||
<div class="w3-row">
|
||||
<div class="w3-col m2 l2"><img src="{{ their_avatar }}" onerror="this.style.display='none'"></div>
|
||||
{% if not no_avatar %}
|
||||
<div class="w3-col m2 l2">
|
||||
{% if their_avatar is not none %}
|
||||
<a href="{{ their_avatar }}"><img src="{{ their_avatar_thumb or '' }}" onerror="this.style.display='none'" class="avatar" loading="lazy"></a>
|
||||
{% else %}
|
||||
<img src="{{ their_avatar_thumb or '' }}" onerror="this.style.display='none'" class="avatar" loading="lazy">
|
||||
{% endif %}
|
||||
</div>
|
||||
<div class="w3-col m10 l10">
|
||||
<div style="text-align: left;">
|
||||
{% else %}
|
||||
<div class="w3-col m12 l12">
|
||||
{% endif %}
|
||||
<div class="w3-left-align">
|
||||
{% if msg.reply is not none %}
|
||||
<div class="reply">
|
||||
<span style="color: #70777a;">Replying to </span>
|
||||
<a href="#{{msg.reply}}" style="color: #168acc;">"{{ msg.quoted_data or 'media' }}"</a>
|
||||
<span class="blue">Replying to </span>
|
||||
<a href="#{{msg.reply}}" class="reply_link">
|
||||
{% if msg.quoted_data is not none %}
|
||||
"{{msg.quoted_data}}"
|
||||
{% else %}
|
||||
this message
|
||||
{% endif %}
|
||||
</a>
|
||||
</div>
|
||||
{% endif %}
|
||||
{% if msg.meta == true or msg.media == false and msg.data is none %}
|
||||
<div style="text-align: center;" class="w3-panel w3-border-blue w3-pale-blue w3-rightbar w3-leftbar">
|
||||
<p>{{ msg.data or 'This message is not supported' }}</p>
|
||||
<div class="w3-panel w3-border-blue w3-pale-blue w3-rightbar w3-leftbar w3-threequarter w3-center">
|
||||
{% if msg.safe %}
|
||||
<p>{{ msg.data | safe or 'Not supported WhatsApp internal message' }}</p>
|
||||
{% else %}
|
||||
<p>{{ msg.data or 'Not supported WhatsApp internal message' }}</p>
|
||||
{% endif %}
|
||||
</div>
|
||||
{% if msg.caption is not none %}
|
||||
<div class="w3-container">
|
||||
{{ msg.caption | urlize(none, true, '_blank') }}
|
||||
</div>
|
||||
{% endif %}
|
||||
{% else %}
|
||||
{% if msg.media == false %}
|
||||
{{ msg.data | sanitize_except() }}
|
||||
{{ msg.data | sanitize_except() | urlize(none, true, '_blank') }}
|
||||
{% else %}
|
||||
{% if "image/" in msg.mime %}
|
||||
<a href="{{ msg.data }}"><img src="{{ msg.data }}" /></a>
|
||||
<a href="{{ msg.data }}">
|
||||
<img src="{{ msg.thumb if msg.thumb is not none else msg.data }}" {{ 'class="sticker"' | safe if msg.sticker }} loading="lazy"/>
|
||||
</a>
|
||||
{% elif "audio/" in msg.mime %}
|
||||
<audio controls="controls" autobuffer="autobuffer">
|
||||
<source src="{{ msg.data }}" />
|
||||
@@ -147,15 +250,16 @@
|
||||
<source src="{{ msg.data }}" />
|
||||
</video>
|
||||
{% elif "/" in msg.mime %}
|
||||
<div style="text-align: center;" class="w3-panel w3-border-blue w3-pale-blue w3-rightbar w3-leftbar">
|
||||
<p>The file cannot be displayed here, however it should be located at {{ msg.data }}</p>
|
||||
<div class="w3-panel w3-border-blue w3-pale-blue w3-rightbar w3-leftbar w3-threequarter w3-center">
|
||||
<p>The file cannot be displayed here, however it should be located at <a href="./{{ msg.data }}">here</a></p>
|
||||
</div>
|
||||
{% else %}
|
||||
{% filter escape %}{{ msg.data }}{% endfilter %}
|
||||
{% endif %}
|
||||
{% if msg.caption is not none %}
|
||||
<br>
|
||||
{{ msg.caption }}
|
||||
<div class="w3-container">
|
||||
{{ msg.caption | urlize(none, true, '_blank') }}
|
||||
</div>
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
@@ -168,7 +272,11 @@
|
||||
</div>
|
||||
</article>
|
||||
<footer class="w3-center">
|
||||
{% if next %}
|
||||
<a href="./{{ next }}">Next</a>
|
||||
{% else %}
|
||||
End of history
|
||||
{% endif %}
|
||||
</footer>
|
||||
</body>
|
||||
</html>
|
||||
@@ -1 +0,0 @@
|
||||
theme: jekyll-theme-cayman
|
||||
20
docs.html
Normal file
20
docs.html
Normal file
@@ -0,0 +1,20 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv="refresh" content="0; url='https://github.com/KnugiHK/WhatsApp-Chat-Exporter/wiki'" />
|
||||
<script type="text/javascript">
|
||||
destination = {
|
||||
"filter": "Filter",
|
||||
"date": "Filters#date-filters",
|
||||
"chat": "Filters#chat-filter",
|
||||
"osl": "Open-Source-Licenses"
|
||||
null: ""
|
||||
};
|
||||
const dest = new URLSearchParams(window.location.search).get('dest');
|
||||
window.location.href = `https://github.com/KnugiHK/WhatsApp-Chat-Exporter/wiki/${destination[dest]}`;
|
||||
</script>
|
||||
</head>
|
||||
<body>
|
||||
<p>If the redirection doesn't work, you can find the documentation at <a href="https://github.com/KnugiHK/WhatsApp-Chat-Exporter/wiki">https://github.com/KnugiHK/WhatsApp-Chat-Exporter/wiki</a>.</p>
|
||||
</body>
|
||||
</html>
|
||||
BIN
imgs/android_structure_backup_crypt15.png
Normal file
BIN
imgs/android_structure_backup_crypt15.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 12 KiB |
BIN
imgs/group.png
BIN
imgs/group.png
Binary file not shown.
|
Before Width: | Height: | Size: 36 KiB After Width: | Height: | Size: 15 KiB |
BIN
imgs/pm.png
BIN
imgs/pm.png
Binary file not shown.
|
Before Width: | Height: | Size: 38 KiB After Width: | Height: | Size: 126 KiB |
@@ -1,34 +0,0 @@
|
||||
# Whatsapp-Chat-Exporter
|
||||
A Whatsapp database parser that will give you the history of your Whatsapp conversations in HTML and JSON
|
||||
**If you plan to uninstall WhatsApp or delete your WhatsApp account, please make a backup of your WhatsApp database. You may want to use this exporter again on the same database in the future as the exporter develops**
|
||||
|
||||
# Usage
|
||||
First, clone this repo, and copy all py and html files to a working directory if you want to do so.
|
||||
```shell
|
||||
git clone https://github.com/KnugiHK/Whatsapp-Chat-Exporter.git
|
||||
```
|
||||
Then, ready your WhatsApp database, place them in the root of working directory.
|
||||
* For Android, it is called msgstore.db. If you want name of your contacts, get the contact database, which is called wa.db.
|
||||
* For iPhone, it is called 7c7fba66680ef796b916b067077cc246adacf01d (YES, a hash).
|
||||
|
||||
Next, ready your media folder, place it in the root of working directory.
|
||||
* For Android, copy the WhatsApp directory from your phone directly.
|
||||
* For iPhone, run the extract_iphone_media.py, and you will get a folder called Message.
|
||||
```
|
||||
python extract_iphone_media.py "C:\Users\[Username]\AppData\Roaming\Apple Computer\MobileSync\Backup\[device id]"
|
||||
```
|
||||
And now, you should have something like this:
|
||||
|
||||

|
||||
|
||||
Last, run the script regarding the type of phone.
|
||||
```
|
||||
python extract.py & :: Android
|
||||
python extract_iphone.py & :: iPhone
|
||||
```
|
||||
And you will get these:
|
||||
#### Private Message
|
||||

|
||||
|
||||
#### Group Message
|
||||

|
||||
48
scripts/bruteforce_crypt15.py
Normal file
48
scripts/bruteforce_crypt15.py
Normal file
@@ -0,0 +1,48 @@
|
||||
import hmac
|
||||
import javaobj
|
||||
import zlib
|
||||
from Crypto.Cipher import AES
|
||||
from hashlib import sha256
|
||||
|
||||
|
||||
def _generate_hmac_of_hmac(key_stream):
|
||||
key = hmac.new(
|
||||
hmac.new(
|
||||
b'\x00' * 32,
|
||||
key_stream,
|
||||
sha256
|
||||
).digest(),
|
||||
b"backup encryption\x01",
|
||||
sha256
|
||||
)
|
||||
return key.digest(), key_stream
|
||||
|
||||
|
||||
def _extract_encrypted_key(keyfile):
|
||||
key_stream = b""
|
||||
for byte in javaobj.loads(keyfile):
|
||||
key_stream += byte.to_bytes(1, "big", signed=True)
|
||||
|
||||
return _generate_hmac_of_hmac(key_stream)
|
||||
|
||||
|
||||
key = open("encrypted_backup.key", "rb").read()
|
||||
database = open("wa.db.crypt15", "rb").read()
|
||||
main_key, hex_key = _extract_encrypted_key(key)
|
||||
for i in range(100):
|
||||
iv = database[i:i+16]
|
||||
for j in range(100):
|
||||
cipher = AES.new(main_key, AES.MODE_GCM, iv)
|
||||
db_ciphertext = database[j:]
|
||||
db_compressed = cipher.decrypt(db_ciphertext)
|
||||
try:
|
||||
db = zlib.decompress(db_compressed)
|
||||
except zlib.error:
|
||||
...
|
||||
else:
|
||||
if db[0:6] == b"SQLite":
|
||||
print(f"Found!\nIV: {i}\nOffset: {j}")
|
||||
print(db_compressed[:10])
|
||||
exit()
|
||||
|
||||
print("Not found! Try to increase maximum search.")
|
||||
37
setup.py
37
setup.py
@@ -11,11 +11,20 @@ setuptools.setup(
|
||||
name="whatsapp-chat-exporter",
|
||||
version=version,
|
||||
author="KnugiHK",
|
||||
author_email="info@knugi.com",
|
||||
description="A Whatsapp database parser that will give you the "
|
||||
"history of your Whatsapp conversations in HTML and JSON.",
|
||||
author_email="hello@knugi.com",
|
||||
description=("A Whatsapp database parser that will give you the "
|
||||
"history of your Whatsapp conversations in HTML and JSON. "
|
||||
"Android, iOS, iPadOS, Crypt12, Crypt14, Crypt15 supported."),
|
||||
long_description=long_description,
|
||||
long_description_content_type="text/markdown",
|
||||
license="MIT",
|
||||
keywords=[
|
||||
"android", "ios", "parsing", "history", "iphone", "message", "crypt15",
|
||||
"customizable", "whatsapp", "android-backup", "messages", "crypt14",
|
||||
"crypt12", "whatsapp-chat-exporter", "whatsapp-export", "iphone-backup",
|
||||
"whatsapp-database", "whatsapp-database-parser", "whatsapp-conversations"
|
||||
],
|
||||
platforms=["any"],
|
||||
url="https://github.com/KnugiHK/Whatsapp-Chat-Exporter",
|
||||
packages=setuptools.find_packages(),
|
||||
package_data={
|
||||
@@ -23,9 +32,10 @@ setuptools.setup(
|
||||
},
|
||||
classifiers=[
|
||||
"Programming Language :: Python :: 3 :: Only",
|
||||
"Programming Language :: Python :: 3.7",
|
||||
"Programming Language :: Python :: 3.8",
|
||||
"Programming Language :: Python :: 3.9",
|
||||
"Programming Language :: Python :: 3.10",
|
||||
"Programming Language :: Python :: 3.11",
|
||||
"License :: OSI Approved :: MIT License",
|
||||
"Operating System :: OS Independent",
|
||||
"Development Status :: 4 - Beta",
|
||||
@@ -35,17 +45,26 @@ setuptools.setup(
|
||||
"Topic :: Utilities",
|
||||
"Topic :: Database"
|
||||
],
|
||||
python_requires='>=3.7',
|
||||
python_requires='>=3.8',
|
||||
install_requires=[
|
||||
'jinja2',
|
||||
'bleach'
|
||||
'jinja2',
|
||||
'bleach'
|
||||
],
|
||||
extras_require={
|
||||
'android_backup': ["pycryptodome"]
|
||||
'android_backup': ["pycryptodome", "javaobj-py3"],
|
||||
'crypt12': ["pycryptodome"],
|
||||
'crypt14': ["pycryptodome"],
|
||||
'crypt15': ["pycryptodome", "javaobj-py3"],
|
||||
'all': ["pycryptodome", "javaobj-py3", "vobject"],
|
||||
'everything': ["pycryptodome", "javaobj-py3", "vobject"],
|
||||
'backup': ["pycryptodome", "javaobj-py3"],
|
||||
'vcards': ["vobject", "pycryptodome", "javaobj-py3"],
|
||||
},
|
||||
entry_points={
|
||||
"console_scripts": [
|
||||
"wtsexporter = Whatsapp_Chat_Exporter.__main__:main"
|
||||
"wtsexporter = Whatsapp_Chat_Exporter.__main__:main",
|
||||
"waexporter = Whatsapp_Chat_Exporter.__main__:main",
|
||||
"whatsapp-chat-exporter = Whatsapp_Chat_Exporter.__main__:main"
|
||||
]
|
||||
}
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user