|
|
@@ -1,7 +1,7 @@ |
|
|
|
# WhatsApp Chat Viewer |
|
|
|
# |
|
|
|
# This script reads a WhatsApp ChatStorage.sqlite database and associated media files |
|
|
|
# to generate a browsable HTML representation of your chats. |
|
|
|
# to generate a browsable HTML archive of chat conversations. |
|
|
|
# |
|
|
|
# Author: Gemini |
|
|
|
# Date: September 7, 2025 |
|
|
@@ -28,46 +28,33 @@ def convert_whatsapp_timestamp(ts): |
|
|
|
except (ValueError, TypeError): |
|
|
|
return "Invalid date" |
|
|
|
|
|
|
|
def get_media_tag(media_path, media_root_dir, output_dir): |
|
|
|
def get_media_tag(media_path, output_dir): |
|
|
|
"""Generates the appropriate HTML tag for a given media file and copies it.""" |
|
|
|
if not media_path: |
|
|
|
return "" |
|
|
|
|
|
|
|
# Path in the DB is often relative like 'Media/WhatsApp Images/IMG-...' |
|
|
|
full_media_path = os.path.join(media_root_dir, os.path.basename(media_path)) |
|
|
|
test_path = os.path.join(output_dir, 'Message', media_path) |
|
|
|
full_media_path = '' |
|
|
|
if not os.path.exists(test_path): |
|
|
|
return f'<div class="media-missing">Media not found: {html.escape(test_path)}</div>' |
|
|
|
|
|
|
|
# Sometimes the path is nested inside a subdirectory within the main Media folder |
|
|
|
if not os.path.exists(full_media_path): |
|
|
|
full_media_path = os.path.join(media_root_dir, media_path) |
|
|
|
|
|
|
|
if not os.path.exists(full_media_path): |
|
|
|
return f'<div class="media-missing">Media not found: {html.escape(media_path)}</div>' |
|
|
|
|
|
|
|
# Create a unique-ish path to avoid filename collisions |
|
|
|
relative_media_path = os.path.join('media', os.path.basename(media_path)) |
|
|
|
dest_path = os.path.join(output_dir, relative_media_path) |
|
|
|
|
|
|
|
os.makedirs(os.path.dirname(dest_path), exist_ok=True) |
|
|
|
|
|
|
|
if not os.path.exists(dest_path): |
|
|
|
try: |
|
|
|
shutil.copy(full_media_path, dest_path) |
|
|
|
except Exception as e: |
|
|
|
return f'<div class="media-missing">Error copying media: {html.escape(str(e))}</div>' |
|
|
|
|
|
|
|
full_media_path = os.path.join('Message', media_path) |
|
|
|
# remove ./ in the beginning if present |
|
|
|
full_media_path = full_media_path.lstrip('./') |
|
|
|
|
|
|
|
ext = os.path.splitext(media_path)[1].lower() |
|
|
|
|
|
|
|
if ext in ['.jpg', '.jpeg', '.png', '.gif', '.webp']: |
|
|
|
return f'<img src="{relative_media_path}" alt="Image" class="media-item">' |
|
|
|
return f'<img src="../{full_media_path}" loading="lazy" alt="Image" class="media-item">' |
|
|
|
elif ext in ['.mp4', '.mov', '.webm']: |
|
|
|
return f'<video controls src="{relative_media_path}" class="media-item"></video>' |
|
|
|
return f'<video controls src="../{full_media_path}" class="media-item" loading="lazy"></video>' |
|
|
|
elif ext in ['.mp3', '.ogg', '.opus', '.m4a']: |
|
|
|
return f'<audio controls src="{relative_media_path}"></audio>' |
|
|
|
return f'<audio controls src="../{full_media_path}" loading="lazy"></audio>' |
|
|
|
else: |
|
|
|
return f'<a href="{relative_media_path}" target="_blank">View Media: {os.path.basename(media_path)}</a>' |
|
|
|
return f'<a href="../{full_media_path}" target="_blank">View Media: {os.path.basename(media_path)}</a>' |
|
|
|
|
|
|
|
def generate_html_chat(db_path, media_path, output_dir, chat_id, chat_name, is_group): |
|
|
|
def generate_html_chat(db_path, media_path, output_dir, chat_id, chat_name, is_group, contact_jid): |
|
|
|
"""Generates an HTML file for a single chat session.""" |
|
|
|
conn = sqlite3.connect(db_path) |
|
|
|
cursor = conn.cursor() |
|
|
@@ -82,7 +69,8 @@ def generate_html_chat(db_path, media_path, output_dir, chat_id, chat_name, is_g |
|
|
|
g.ZCONTACTNAME AS GroupMemberContactName, |
|
|
|
cs.ZPARTNERNAME AS ChatPartnerName, |
|
|
|
p.ZPUSHNAME AS ProfilePushName, |
|
|
|
mi.ZMEDIALOCALPATH |
|
|
|
mi.ZMEDIALOCALPATH, |
|
|
|
cs.ZCONTACTJID AS ChatJID |
|
|
|
FROM |
|
|
|
ZWAMESSAGE m |
|
|
|
LEFT JOIN |
|
|
@@ -107,36 +95,42 @@ def generate_html_chat(db_path, media_path, output_dir, chat_id, chat_name, is_g |
|
|
|
print(f"No messages found for chat: {chat_name}") |
|
|
|
return |
|
|
|
|
|
|
|
# Sanitize chat name for filename, allowing emojis |
|
|
|
safe_filename = "".join(c for c in chat_name if ( |
|
|
|
c.isalnum() or |
|
|
|
c in (' ', '-') or |
|
|
|
'\U0001F300' <= c <= '\U0001FAFF' # Unicode range for most emojis |
|
|
|
)).rstrip() |
|
|
|
# Sanitize contact_jid for a unique and safe filename |
|
|
|
if contact_jid: |
|
|
|
safe_filename = "".join(c if c.isalnum() else "_" for c in contact_jid) |
|
|
|
else: |
|
|
|
# Fallback to chat_id if contact_jid is not available |
|
|
|
safe_filename = str(chat_id) |
|
|
|
|
|
|
|
chats_dir = os.path.join(output_dir, "chats") |
|
|
|
os.makedirs(chats_dir, exist_ok=True) |
|
|
|
html_filename = os.path.join(chats_dir, f"{safe_filename}.html") |
|
|
|
|
|
|
|
with open(html_filename, 'w', encoding='utf-8') as f: |
|
|
|
f.write(f""" |
|
|
|
<!DOCTYPE html> |
|
|
|
<html lang="en"> |
|
|
|
<head> |
|
|
|
<meta charset="UTF-8"> |
|
|
|
<meta name="viewport" content="width=device-width, initial-scale=1.0"> |
|
|
|
<title>Chat with {html.escape(chat_name)}</title> |
|
|
|
<style> |
|
|
|
<!DOCTYPE html> |
|
|
|
<html lang="en"> |
|
|
|
<head> |
|
|
|
<meta charset="UTF-8"> |
|
|
|
<meta name="viewport" content="width=device-width, initial-scale=1.0"> |
|
|
|
<title>Chat with {html.escape(chat_name)}</title> |
|
|
|
<style> |
|
|
|
body {{ |
|
|
|
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Helvetica, Arial, sans-serif; |
|
|
|
background-color: #e5ddd5; |
|
|
|
margin: 0; |
|
|
|
padding: 20px; |
|
|
|
color: #111b21; |
|
|
|
min-height: 100vh; |
|
|
|
box-sizing: border-box; |
|
|
|
}} |
|
|
|
.chat-container {{ |
|
|
|
max-width: 800px; |
|
|
|
margin: auto; |
|
|
|
background-image: url('https://user-images.githubusercontent.com/15075759/28719144-86dc0f70-73b1-11e7-911d-60d70fcded21.png'); /* Subtle background pattern */ |
|
|
|
background-image: url('../current_wallpaper.jpg'); |
|
|
|
background-size: auto 100%; |
|
|
|
background-attachment: fixed; |
|
|
|
background-position: center; |
|
|
|
border-radius: 8px; |
|
|
|
box-shadow: 0 1px 1px 0 rgba(0,0,0,0.06), 0 2px 5px 0 rgba(0,0,0,0.06); |
|
|
|
overflow: hidden; |
|
|
@@ -148,6 +142,14 @@ def generate_html_chat(db_path, media_path, output_dir, chat_id, chat_name, is_g |
|
|
|
font-size: 1.2em; |
|
|
|
text-align: center; |
|
|
|
}} |
|
|
|
|
|
|
|
|
|
|
|
.chat-header-id {{ |
|
|
|
font-size: 0.7em; |
|
|
|
opacity: 0.8; |
|
|
|
margin-top: 5px; |
|
|
|
}} |
|
|
|
|
|
|
|
.chat-box {{ |
|
|
|
padding: 20px; |
|
|
|
display: flex; |
|
|
@@ -197,16 +199,19 @@ def generate_html_chat(db_path, media_path, output_dir, chat_id, chat_name, is_g |
|
|
|
padding: 10px; |
|
|
|
border-radius: 8px; |
|
|
|
}} |
|
|
|
</style> |
|
|
|
</head> |
|
|
|
<body> |
|
|
|
<div class="chat-container"> |
|
|
|
<div class="chat-header">{html.escape(chat_name)}</div> |
|
|
|
</style> |
|
|
|
</head> |
|
|
|
<body> |
|
|
|
<div class="chat-container"> |
|
|
|
<div class="chat-header"> |
|
|
|
{html.escape(chat_name)} |
|
|
|
<div class="chat-header-id">{contact_jid}</div> |
|
|
|
</div> |
|
|
|
<div class="chat-box"> |
|
|
|
""") |
|
|
|
|
|
|
|
# Write messages |
|
|
|
for is_from_me, text, timestamp, from_jid, group_member_contact_name, chat_partner_name, profile_push_name, media_local_path in messages: |
|
|
|
for is_from_me, text, timestamp, from_jid, group_member_contact_name, chat_partner_name, profile_push_name, media_local_path, contact_jid in messages: |
|
|
|
msg_class = "sent" if is_from_me else "received" |
|
|
|
|
|
|
|
f.write(f'<div class="message {msg_class}">') |
|
|
@@ -241,8 +246,9 @@ def generate_html_chat(db_path, media_path, output_dir, chat_id, chat_name, is_g |
|
|
|
f.write(f'<div>{escaped_text.replace(chr(10), "<br>")}</div>') |
|
|
|
|
|
|
|
if media_local_path: |
|
|
|
f.write(get_media_tag(media_local_path, media_path, output_dir)) |
|
|
|
|
|
|
|
# print("Media path:", media_local_path) |
|
|
|
f.write(get_media_tag(media_local_path, output_dir)) |
|
|
|
|
|
|
|
f.write(f'<div class="timestamp">{convert_whatsapp_timestamp(timestamp)}</div>') |
|
|
|
f.write('</div>') |
|
|
|
|
|
|
@@ -256,25 +262,66 @@ def generate_html_chat(db_path, media_path, output_dir, chat_id, chat_name, is_g |
|
|
|
print(f"Successfully generated HTML for: {chat_name}") |
|
|
|
|
|
|
|
|
|
|
|
# Step: iPhone backup manifest.db processing |
|
|
|
def process_iphone_backup(backup_path, output_dir): |
|
|
|
""" |
|
|
|
Processes the iPhone backup manifest.db, extracts WhatsApp shared files, and recreates the file structure in output_dir. |
|
|
|
""" |
|
|
|
manifest_db_path = os.path.join(backup_path, 'Manifest.db') |
|
|
|
if not os.path.exists(manifest_db_path): |
|
|
|
print(f"Manifest.db not found in backup path: {manifest_db_path}") |
|
|
|
return |
|
|
|
|
|
|
|
# Connect to manifest.db and extract WhatsApp shared files |
|
|
|
conn = sqlite3.connect(manifest_db_path) |
|
|
|
cursor = conn.cursor() |
|
|
|
cursor.execute("SELECT fileID, domain, relativePath FROM Files WHERE domain = ?", ('AppDomainGroup-group.net.whatsapp.WhatsApp.shared',)) |
|
|
|
files = cursor.fetchall() |
|
|
|
print(f"Found {len(files)} WhatsApp shared files in manifest.db.") |
|
|
|
# Prepare to recreate file structure |
|
|
|
for fileID, domain, relativePath in files: |
|
|
|
src_file = os.path.join(backup_path, fileID[:2], fileID) |
|
|
|
dest_file = os.path.join(output_dir, relativePath) |
|
|
|
os.makedirs(os.path.dirname(dest_file), exist_ok=True) |
|
|
|
if os.path.exists(src_file): |
|
|
|
if not os.path.exists(dest_file): |
|
|
|
try: |
|
|
|
shutil.copy2(src_file, dest_file) |
|
|
|
except Exception as e: |
|
|
|
print(f"Error copying {src_file} to {dest_file}: {e}") |
|
|
|
else: |
|
|
|
print(f"Source file missing: {src_file}") |
|
|
|
|
|
|
|
|
|
|
|
def main(): |
|
|
|
parser = argparse.ArgumentParser(description="WhatsApp Chat Exporter") |
|
|
|
parser.add_argument("db_path", help="Path to the ChatStorage.sqlite file.") |
|
|
|
parser.add_argument("media_path", help="Path to the root 'Media' directory.") |
|
|
|
parser.add_argument("--output", default="_html_export", help="Directory to save the HTML files.") |
|
|
|
|
|
|
|
parser.add_argument("--backup-path", default=None, help="Path to iPhone backup directory (for manifest.db processing)") |
|
|
|
args = parser.parse_args() |
|
|
|
|
|
|
|
if not os.path.exists(args.db_path): |
|
|
|
print(f"Error: Database file not found at '{args.db_path}'") |
|
|
|
if args.backup_path: |
|
|
|
process_iphone_backup(args.backup_path, args.output) |
|
|
|
# Use backup paths for archive creation |
|
|
|
db_path = os.path.join(args.output, "ChatStorage.sqlite") |
|
|
|
media_path = os.path.join(args.output, "Message/") |
|
|
|
else: |
|
|
|
parser.add_argument("db_path", help="Path to the ChatStorage.sqlite file.") |
|
|
|
parser.add_argument("media_path", help="Path to the root 'Media' directory.") |
|
|
|
args = parser.parse_args() |
|
|
|
db_path = args.db_path |
|
|
|
media_path = args.media_path |
|
|
|
|
|
|
|
if not os.path.exists(db_path): |
|
|
|
print(f"Error: Database file not found at '{db_path}'") |
|
|
|
return |
|
|
|
|
|
|
|
if not os.path.exists(args.media_path): |
|
|
|
print(f"Error: Media directory not found at '{args.media_path}'") |
|
|
|
if not os.path.exists(media_path): |
|
|
|
print(f"Error: Media directory not found at '{media_path}'") |
|
|
|
return |
|
|
|
|
|
|
|
os.makedirs(args.output, exist_ok=True) |
|
|
|
|
|
|
|
conn = sqlite3.connect(args.db_path) |
|
|
|
conn = sqlite3.connect(db_path) |
|
|
|
cursor = conn.cursor() |
|
|
|
|
|
|
|
# Get all chats, joining with ZWAPROFILEPUSHNAME and using COALESCE to get the best possible name. |
|
|
@@ -285,15 +332,22 @@ def main(): |
|
|
|
cs.ZCONTACTJID, |
|
|
|
cs.ZMESSAGECOUNTER, |
|
|
|
MIN(m.ZMESSAGEDATE) as FirstMessageDate, |
|
|
|
MAX(m.ZMESSAGEDATE) as LastMessageDate |
|
|
|
MAX(m.ZMESSAGEDATE) as LastMessageDate, |
|
|
|
COALESCE(gi.ZPICTUREPATH, pic.ZPATH) AS AvatarPath |
|
|
|
FROM |
|
|
|
ZWACHATSESSION cs |
|
|
|
LEFT JOIN |
|
|
|
ZWAPROFILEPUSHNAME p ON cs.ZCONTACTJID = p.ZJID |
|
|
|
LEFT JOIN |
|
|
|
ZWAMESSAGE m ON cs.Z_PK = m.ZCHATSESSION |
|
|
|
LEFT JOIN |
|
|
|
ZWAGROUPINFO gi ON cs.ZGROUPINFO = gi.Z_PK |
|
|
|
LEFT JOIN |
|
|
|
ZWAPROFILEPICTUREITEM pic ON cs.ZCONTACTJID = pic.ZJID |
|
|
|
WHERE |
|
|
|
cs.ZCONTACTJID NOT LIKE '%@status' |
|
|
|
GROUP BY |
|
|
|
cs.Z_PK, ChatName, cs.ZCONTACTJID, cs.ZMESSAGECOUNTER |
|
|
|
cs.Z_PK, ChatName, cs.ZCONTACTJID, cs.ZMESSAGECOUNTER, AvatarPath |
|
|
|
ORDER BY |
|
|
|
LastMessageDate DESC NULLS LAST, ChatName |
|
|
|
""") |
|
|
@@ -426,22 +480,40 @@ def main(): |
|
|
|
<div class="container"> |
|
|
|
<ul> |
|
|
|
""") |
|
|
|
for chat_id, chat_name, contact_jid, message_count, first_message_date, last_message_date in chats: |
|
|
|
for chat_id, chat_name, contact_jid, message_count, first_message_date, last_message_date, avatar_path in chats: |
|
|
|
if not chat_name: |
|
|
|
chat_name = f"Unknown Chat ({contact_jid or chat_id})" |
|
|
|
full_avatar_path = avatar_path if avatar_path and os.path.isabs(avatar_path) else os.path.join(args.output, avatar_path) if avatar_path else None |
|
|
|
|
|
|
|
# Find all file paths in args.output that start with full_avatar_path |
|
|
|
matching_files = [] |
|
|
|
if full_avatar_path: |
|
|
|
for root, dirs, files in os.walk(args.output): |
|
|
|
for file in files: |
|
|
|
file_path = os.path.join(root, file) |
|
|
|
if file_path.startswith(full_avatar_path): |
|
|
|
matching_files.append(file_path) |
|
|
|
|
|
|
|
# Use the first matching file if available |
|
|
|
if matching_files: |
|
|
|
avatar_path = os.path.relpath(matching_files[0], args.output) |
|
|
|
full_avatar_path = matching_files[0] |
|
|
|
|
|
|
|
# A group chat JID typically ends with '@g.us' |
|
|
|
is_group = contact_jid and '@g.us' in contact_jid |
|
|
|
|
|
|
|
# Allow alphanumeric, spaces, hyphens, and emojis in filename |
|
|
|
safe_filename = "".join(c for c in chat_name if ( |
|
|
|
c.isalnum() or |
|
|
|
c in (' ', '-') or |
|
|
|
'\U0001F300' <= c <= '\U0001FAFF' # Unicode range for most emojis |
|
|
|
)).rstrip() |
|
|
|
# Sanitize contact_jid for a unique and safe filename |
|
|
|
if contact_jid: |
|
|
|
safe_filename = "".join(c if c.isalnum() else "_" for c in contact_jid) |
|
|
|
else: |
|
|
|
# Fallback to chat_id if contact_jid is not available |
|
|
|
safe_filename = str(chat_id) |
|
|
|
|
|
|
|
# Add default avatar based on chat type |
|
|
|
avatar_html = f'<div class="chat-avatar default-{"group" if is_group else "individual"}"></div>' |
|
|
|
if avatar_path and os.path.exists(full_avatar_path): |
|
|
|
avatar_html = f'<div class="chat-avatar" style="background-image: url(\'{avatar_path}\');"></div>' |
|
|
|
else: |
|
|
|
avatar_html = f'<div class="chat-avatar default-{"group" if is_group else "individual"}"></div>' |
|
|
|
|
|
|
|
# Format date range |
|
|
|
date_range = "" |
|
|
@@ -455,8 +527,8 @@ def main(): |
|
|
|
|
|
|
|
if message_count > 0: |
|
|
|
# Generate chat HTML only for chats with messages |
|
|
|
generate_html_chat(args.db_path, args.media_path, args.output, chat_id, chat_name, is_group) |
|
|
|
|
|
|
|
generate_html_chat(db_path, media_path, args.output, chat_id, chat_name, is_group, contact_jid) |
|
|
|
|
|
|
|
# Clickable entry with link |
|
|
|
index_f.write( |
|
|
|
f'<li><a class="chat-entry" href="chats/{html.escape(safe_filename)}.html">' |