Browse Source

add backup as input, various other changes

main
Andreas Demmelbauer 1 week ago
parent
commit
8fc33808b1
3 changed files with 149 additions and 78 deletions
  1. +4
    -1
      .gitignore
  2. +3
    -7
      README.md
  3. +142
    -70
      whatsapp_exporter.py

+ 4
- 1
.gitignore View File

@@ -1,3 +1,6 @@
_html_export/
Messages/
ChatStorage.sqlite
iphone-backup/
output/
_html_export/
*.sqlite

+ 3
- 7
README.md View File

@@ -1,10 +1,6 @@
For generating the HTML Archive, you need following:
* `Messages` directory - Containig all Media Files (e. g. from WhatsApp Backup)
* `ChatStorage.sqlite` - The Database containing all Chats (e. g. from iPhone Backup)
You need an unencryped iOS backup

Place them next to the Script.

Then run:
run:
```
python3 whatsapp_viewer.py ChatStorage.sqlite Messages
python3 whatsapp_exporter.py --backup-path=./iphone-backup/46de1f4ca4a30b155985910d009edaf586236798/ --output=./output/
```

whatsapp_viewer.py → whatsapp_exporter.py View File

@@ -1,7 +1,7 @@
# WhatsApp Chat Viewer
#
# This script reads a WhatsApp ChatStorage.sqlite database and associated media files
# to generate a browsable HTML representation of your chats.
# to generate a browsable HTML archive of chat conversations.
#
# Author: Gemini
# Date: September 7, 2025
@@ -28,46 +28,33 @@ def convert_whatsapp_timestamp(ts):
except (ValueError, TypeError):
return "Invalid date"

def get_media_tag(media_path, media_root_dir, output_dir):
def get_media_tag(media_path, output_dir):
"""Generates the appropriate HTML tag for a given media file and copies it."""
if not media_path:
return ""

# Path in the DB is often relative like 'Media/WhatsApp Images/IMG-...'
full_media_path = os.path.join(media_root_dir, os.path.basename(media_path))
test_path = os.path.join(output_dir, 'Message', media_path)
full_media_path = ''
if not os.path.exists(test_path):
return f'<div class="media-missing">Media not found: {html.escape(test_path)}</div>'
# Sometimes the path is nested inside a subdirectory within the main Media folder
if not os.path.exists(full_media_path):
full_media_path = os.path.join(media_root_dir, media_path)

if not os.path.exists(full_media_path):
return f'<div class="media-missing">Media not found: {html.escape(media_path)}</div>'

# Create a unique-ish path to avoid filename collisions
relative_media_path = os.path.join('media', os.path.basename(media_path))
dest_path = os.path.join(output_dir, relative_media_path)
os.makedirs(os.path.dirname(dest_path), exist_ok=True)
if not os.path.exists(dest_path):
try:
shutil.copy(full_media_path, dest_path)
except Exception as e:
return f'<div class="media-missing">Error copying media: {html.escape(str(e))}</div>'

full_media_path = os.path.join('Message', media_path)
# remove ./ in the beginning if present
full_media_path = full_media_path.lstrip('./')

ext = os.path.splitext(media_path)[1].lower()
if ext in ['.jpg', '.jpeg', '.png', '.gif', '.webp']:
return f'<img src="{relative_media_path}" alt="Image" class="media-item">'
return f'<img src="../{full_media_path}" loading="lazy" alt="Image" class="media-item">'
elif ext in ['.mp4', '.mov', '.webm']:
return f'<video controls src="{relative_media_path}" class="media-item"></video>'
return f'<video controls src="../{full_media_path}" class="media-item" loading="lazy"></video>'
elif ext in ['.mp3', '.ogg', '.opus', '.m4a']:
return f'<audio controls src="{relative_media_path}"></audio>'
return f'<audio controls src="../{full_media_path}" loading="lazy"></audio>'
else:
return f'<a href="{relative_media_path}" target="_blank">View Media: {os.path.basename(media_path)}</a>'
return f'<a href="../{full_media_path}" target="_blank">View Media: {os.path.basename(media_path)}</a>'

def generate_html_chat(db_path, media_path, output_dir, chat_id, chat_name, is_group):
def generate_html_chat(db_path, media_path, output_dir, chat_id, chat_name, is_group, contact_jid):
"""Generates an HTML file for a single chat session."""
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
@@ -82,7 +69,8 @@ def generate_html_chat(db_path, media_path, output_dir, chat_id, chat_name, is_g
g.ZCONTACTNAME AS GroupMemberContactName,
cs.ZPARTNERNAME AS ChatPartnerName,
p.ZPUSHNAME AS ProfilePushName,
mi.ZMEDIALOCALPATH
mi.ZMEDIALOCALPATH,
cs.ZCONTACTJID AS ChatJID
FROM
ZWAMESSAGE m
LEFT JOIN
@@ -107,36 +95,42 @@ def generate_html_chat(db_path, media_path, output_dir, chat_id, chat_name, is_g
print(f"No messages found for chat: {chat_name}")
return

# Sanitize chat name for filename, allowing emojis
safe_filename = "".join(c for c in chat_name if (
c.isalnum() or
c in (' ', '-') or
'\U0001F300' <= c <= '\U0001FAFF' # Unicode range for most emojis
)).rstrip()
# Sanitize contact_jid for a unique and safe filename
if contact_jid:
safe_filename = "".join(c if c.isalnum() else "_" for c in contact_jid)
else:
# Fallback to chat_id if contact_jid is not available
safe_filename = str(chat_id)

chats_dir = os.path.join(output_dir, "chats")
os.makedirs(chats_dir, exist_ok=True)
html_filename = os.path.join(chats_dir, f"{safe_filename}.html")

with open(html_filename, 'w', encoding='utf-8') as f:
f.write(f"""
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Chat with {html.escape(chat_name)}</title>
<style>
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Chat with {html.escape(chat_name)}</title>
<style>
body {{
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Helvetica, Arial, sans-serif;
background-color: #e5ddd5;
margin: 0;
padding: 20px;
color: #111b21;
min-height: 100vh;
box-sizing: border-box;
}}
.chat-container {{
max-width: 800px;
margin: auto;
background-image: url('https://user-images.githubusercontent.com/15075759/28719144-86dc0f70-73b1-11e7-911d-60d70fcded21.png'); /* Subtle background pattern */
background-image: url('../current_wallpaper.jpg');
background-size: auto 100%;
background-attachment: fixed;
background-position: center;
border-radius: 8px;
box-shadow: 0 1px 1px 0 rgba(0,0,0,0.06), 0 2px 5px 0 rgba(0,0,0,0.06);
overflow: hidden;
@@ -148,6 +142,14 @@ def generate_html_chat(db_path, media_path, output_dir, chat_id, chat_name, is_g
font-size: 1.2em;
text-align: center;
}}


.chat-header-id {{
font-size: 0.7em;
opacity: 0.8;
margin-top: 5px;
}}
.chat-box {{
padding: 20px;
display: flex;
@@ -197,16 +199,19 @@ def generate_html_chat(db_path, media_path, output_dir, chat_id, chat_name, is_g
padding: 10px;
border-radius: 8px;
}}
</style>
</head>
<body>
<div class="chat-container">
<div class="chat-header">{html.escape(chat_name)}</div>
</style>
</head>
<body>
<div class="chat-container">
<div class="chat-header">
{html.escape(chat_name)}
<div class="chat-header-id">{contact_jid}</div>
</div>
<div class="chat-box">
""")

# Write messages
for is_from_me, text, timestamp, from_jid, group_member_contact_name, chat_partner_name, profile_push_name, media_local_path in messages:
for is_from_me, text, timestamp, from_jid, group_member_contact_name, chat_partner_name, profile_push_name, media_local_path, contact_jid in messages:
msg_class = "sent" if is_from_me else "received"
f.write(f'<div class="message {msg_class}">')
@@ -241,8 +246,9 @@ def generate_html_chat(db_path, media_path, output_dir, chat_id, chat_name, is_g
f.write(f'<div>{escaped_text.replace(chr(10), "<br>")}</div>')
if media_local_path:
f.write(get_media_tag(media_local_path, media_path, output_dir))
# print("Media path:", media_local_path)
f.write(get_media_tag(media_local_path, output_dir))

f.write(f'<div class="timestamp">{convert_whatsapp_timestamp(timestamp)}</div>')
f.write('</div>')

@@ -256,25 +262,66 @@ def generate_html_chat(db_path, media_path, output_dir, chat_id, chat_name, is_g
print(f"Successfully generated HTML for: {chat_name}")


# Step: iPhone backup manifest.db processing
def process_iphone_backup(backup_path, output_dir):
"""
Processes the iPhone backup manifest.db, extracts WhatsApp shared files, and recreates the file structure in output_dir.
"""
manifest_db_path = os.path.join(backup_path, 'Manifest.db')
if not os.path.exists(manifest_db_path):
print(f"Manifest.db not found in backup path: {manifest_db_path}")
return

# Connect to manifest.db and extract WhatsApp shared files
conn = sqlite3.connect(manifest_db_path)
cursor = conn.cursor()
cursor.execute("SELECT fileID, domain, relativePath FROM Files WHERE domain = ?", ('AppDomainGroup-group.net.whatsapp.WhatsApp.shared',))
files = cursor.fetchall()
print(f"Found {len(files)} WhatsApp shared files in manifest.db.")
# Prepare to recreate file structure
for fileID, domain, relativePath in files:
src_file = os.path.join(backup_path, fileID[:2], fileID)
dest_file = os.path.join(output_dir, relativePath)
os.makedirs(os.path.dirname(dest_file), exist_ok=True)
if os.path.exists(src_file):
if not os.path.exists(dest_file):
try:
shutil.copy2(src_file, dest_file)
except Exception as e:
print(f"Error copying {src_file} to {dest_file}: {e}")
else:
print(f"Source file missing: {src_file}")


def main():
parser = argparse.ArgumentParser(description="WhatsApp Chat Exporter")
parser.add_argument("db_path", help="Path to the ChatStorage.sqlite file.")
parser.add_argument("media_path", help="Path to the root 'Media' directory.")
parser.add_argument("--output", default="_html_export", help="Directory to save the HTML files.")
parser.add_argument("--backup-path", default=None, help="Path to iPhone backup directory (for manifest.db processing)")
args = parser.parse_args()

if not os.path.exists(args.db_path):
print(f"Error: Database file not found at '{args.db_path}'")
if args.backup_path:
process_iphone_backup(args.backup_path, args.output)
# Use backup paths for archive creation
db_path = os.path.join(args.output, "ChatStorage.sqlite")
media_path = os.path.join(args.output, "Message/")
else:
parser.add_argument("db_path", help="Path to the ChatStorage.sqlite file.")
parser.add_argument("media_path", help="Path to the root 'Media' directory.")
args = parser.parse_args()
db_path = args.db_path
media_path = args.media_path

if not os.path.exists(db_path):
print(f"Error: Database file not found at '{db_path}'")
return

if not os.path.exists(args.media_path):
print(f"Error: Media directory not found at '{args.media_path}'")
if not os.path.exists(media_path):
print(f"Error: Media directory not found at '{media_path}'")
return

os.makedirs(args.output, exist_ok=True)

conn = sqlite3.connect(args.db_path)
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
# Get all chats, joining with ZWAPROFILEPUSHNAME and using COALESCE to get the best possible name.
@@ -285,15 +332,22 @@ def main():
cs.ZCONTACTJID,
cs.ZMESSAGECOUNTER,
MIN(m.ZMESSAGEDATE) as FirstMessageDate,
MAX(m.ZMESSAGEDATE) as LastMessageDate
MAX(m.ZMESSAGEDATE) as LastMessageDate,
COALESCE(gi.ZPICTUREPATH, pic.ZPATH) AS AvatarPath
FROM
ZWACHATSESSION cs
LEFT JOIN
ZWAPROFILEPUSHNAME p ON cs.ZCONTACTJID = p.ZJID
LEFT JOIN
ZWAMESSAGE m ON cs.Z_PK = m.ZCHATSESSION
LEFT JOIN
ZWAGROUPINFO gi ON cs.ZGROUPINFO = gi.Z_PK
LEFT JOIN
ZWAPROFILEPICTUREITEM pic ON cs.ZCONTACTJID = pic.ZJID
WHERE
cs.ZCONTACTJID NOT LIKE '%@status'
GROUP BY
cs.Z_PK, ChatName, cs.ZCONTACTJID, cs.ZMESSAGECOUNTER
cs.Z_PK, ChatName, cs.ZCONTACTJID, cs.ZMESSAGECOUNTER, AvatarPath
ORDER BY
LastMessageDate DESC NULLS LAST, ChatName
""")
@@ -426,22 +480,40 @@ def main():
<div class="container">
<ul>
""")
for chat_id, chat_name, contact_jid, message_count, first_message_date, last_message_date in chats:
for chat_id, chat_name, contact_jid, message_count, first_message_date, last_message_date, avatar_path in chats:
if not chat_name:
chat_name = f"Unknown Chat ({contact_jid or chat_id})"
full_avatar_path = avatar_path if avatar_path and os.path.isabs(avatar_path) else os.path.join(args.output, avatar_path) if avatar_path else None

# Find all file paths in args.output that start with full_avatar_path
matching_files = []
if full_avatar_path:
for root, dirs, files in os.walk(args.output):
for file in files:
file_path = os.path.join(root, file)
if file_path.startswith(full_avatar_path):
matching_files.append(file_path)

# Use the first matching file if available
if matching_files:
avatar_path = os.path.relpath(matching_files[0], args.output)
full_avatar_path = matching_files[0]
# A group chat JID typically ends with '@g.us'
is_group = contact_jid and '@g.us' in contact_jid
# Allow alphanumeric, spaces, hyphens, and emojis in filename
safe_filename = "".join(c for c in chat_name if (
c.isalnum() or
c in (' ', '-') or
'\U0001F300' <= c <= '\U0001FAFF' # Unicode range for most emojis
)).rstrip()
# Sanitize contact_jid for a unique and safe filename
if contact_jid:
safe_filename = "".join(c if c.isalnum() else "_" for c in contact_jid)
else:
# Fallback to chat_id if contact_jid is not available
safe_filename = str(chat_id)

# Add default avatar based on chat type
avatar_html = f'<div class="chat-avatar default-{"group" if is_group else "individual"}"></div>'
if avatar_path and os.path.exists(full_avatar_path):
avatar_html = f'<div class="chat-avatar" style="background-image: url(\'{avatar_path}\');"></div>'
else:
avatar_html = f'<div class="chat-avatar default-{"group" if is_group else "individual"}"></div>'
# Format date range
date_range = ""
@@ -455,8 +527,8 @@ def main():
if message_count > 0:
# Generate chat HTML only for chats with messages
generate_html_chat(args.db_path, args.media_path, args.output, chat_id, chat_name, is_group)
generate_html_chat(db_path, media_path, args.output, chat_id, chat_name, is_group, contact_jid)
# Clickable entry with link
index_f.write(
f'<li><a class="chat-entry" href="chats/{html.escape(safe_filename)}.html">'

Loading…
Cancel
Save