From 8fc33808b12151fb1094a0523bc462b786153c81 Mon Sep 17 00:00:00 2001 From: Andreas Demmelbauer Date: Sun, 7 Sep 2025 00:42:30 +0200 Subject: [PATCH] add backup as input, various other changes --- .gitignore | 5 +- README.md | 10 +- whatsapp_viewer.py => whatsapp_exporter.py | 212 ++++++++++++++------- 3 files changed, 149 insertions(+), 78 deletions(-) rename whatsapp_viewer.py => whatsapp_exporter.py (70%) diff --git a/.gitignore b/.gitignore index e0924f4..d5cb1a8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ _html_export/ Messages/ -ChatStorage.sqlite \ No newline at end of file +iphone-backup/ +output/ +_html_export/ +*.sqlite \ No newline at end of file diff --git a/README.md b/README.md index 449d943..d7d10ab 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,6 @@ -For generating the HTML Archive, you need following: -* `Messages` directory - Containig all Media Files (e. g. from WhatsApp Backup) -* `ChatStorage.sqlite` - The Database containing all Chats (e. g. from iPhone Backup) +You need an unencryped iOS backup -Place them next to the Script. - -Then run: +run: ``` -python3 whatsapp_viewer.py ChatStorage.sqlite Messages +python3 whatsapp_exporter.py --backup-path=./iphone-backup/46de1f4ca4a30b155985910d009edaf586236798/ --output=./output/ ``` \ No newline at end of file diff --git a/whatsapp_viewer.py b/whatsapp_exporter.py similarity index 70% rename from whatsapp_viewer.py rename to whatsapp_exporter.py index 7318c1a..d7413fa 100644 --- a/whatsapp_viewer.py +++ b/whatsapp_exporter.py @@ -1,7 +1,7 @@ # WhatsApp Chat Viewer # # This script reads a WhatsApp ChatStorage.sqlite database and associated media files -# to generate a browsable HTML representation of your chats. +# to generate a browsable HTML archive of chat conversations. # # Author: Gemini # Date: September 7, 2025 @@ -28,46 +28,33 @@ def convert_whatsapp_timestamp(ts): except (ValueError, TypeError): return "Invalid date" -def get_media_tag(media_path, media_root_dir, output_dir): +def get_media_tag(media_path, output_dir): """Generates the appropriate HTML tag for a given media file and copies it.""" if not media_path: return "" # Path in the DB is often relative like 'Media/WhatsApp Images/IMG-...' - full_media_path = os.path.join(media_root_dir, os.path.basename(media_path)) + test_path = os.path.join(output_dir, 'Message', media_path) + full_media_path = '' + if not os.path.exists(test_path): + return f'
Media not found: {html.escape(test_path)}
' - # Sometimes the path is nested inside a subdirectory within the main Media folder - if not os.path.exists(full_media_path): - full_media_path = os.path.join(media_root_dir, media_path) - - if not os.path.exists(full_media_path): - return f'
Media not found: {html.escape(media_path)}
' - - # Create a unique-ish path to avoid filename collisions - relative_media_path = os.path.join('media', os.path.basename(media_path)) - dest_path = os.path.join(output_dir, relative_media_path) - - os.makedirs(os.path.dirname(dest_path), exist_ok=True) - - if not os.path.exists(dest_path): - try: - shutil.copy(full_media_path, dest_path) - except Exception as e: - return f'
Error copying media: {html.escape(str(e))}
' - + full_media_path = os.path.join('Message', media_path) + # remove ./ in the beginning if present + full_media_path = full_media_path.lstrip('./') ext = os.path.splitext(media_path)[1].lower() if ext in ['.jpg', '.jpeg', '.png', '.gif', '.webp']: - return f'Image' + return f'Image' elif ext in ['.mp4', '.mov', '.webm']: - return f'' + return f'' elif ext in ['.mp3', '.ogg', '.opus', '.m4a']: - return f'' + return f'' else: - return f'View Media: {os.path.basename(media_path)}' + return f'View Media: {os.path.basename(media_path)}' -def generate_html_chat(db_path, media_path, output_dir, chat_id, chat_name, is_group): +def generate_html_chat(db_path, media_path, output_dir, chat_id, chat_name, is_group, contact_jid): """Generates an HTML file for a single chat session.""" conn = sqlite3.connect(db_path) cursor = conn.cursor() @@ -82,7 +69,8 @@ def generate_html_chat(db_path, media_path, output_dir, chat_id, chat_name, is_g g.ZCONTACTNAME AS GroupMemberContactName, cs.ZPARTNERNAME AS ChatPartnerName, p.ZPUSHNAME AS ProfilePushName, - mi.ZMEDIALOCALPATH + mi.ZMEDIALOCALPATH, + cs.ZCONTACTJID AS ChatJID FROM ZWAMESSAGE m LEFT JOIN @@ -107,36 +95,42 @@ def generate_html_chat(db_path, media_path, output_dir, chat_id, chat_name, is_g print(f"No messages found for chat: {chat_name}") return - # Sanitize chat name for filename, allowing emojis - safe_filename = "".join(c for c in chat_name if ( - c.isalnum() or - c in (' ', '-') or - '\U0001F300' <= c <= '\U0001FAFF' # Unicode range for most emojis - )).rstrip() + # Sanitize contact_jid for a unique and safe filename + if contact_jid: + safe_filename = "".join(c if c.isalnum() else "_" for c in contact_jid) + else: + # Fallback to chat_id if contact_jid is not available + safe_filename = str(chat_id) + chats_dir = os.path.join(output_dir, "chats") os.makedirs(chats_dir, exist_ok=True) html_filename = os.path.join(chats_dir, f"{safe_filename}.html") with open(html_filename, 'w', encoding='utf-8') as f: f.write(f""" - - - - - - Chat with {html.escape(chat_name)} - - - -
-
{html.escape(chat_name)}
+ + + +
+
+ {html.escape(chat_name)} +
{contact_jid}
+
""") # Write messages - for is_from_me, text, timestamp, from_jid, group_member_contact_name, chat_partner_name, profile_push_name, media_local_path in messages: + for is_from_me, text, timestamp, from_jid, group_member_contact_name, chat_partner_name, profile_push_name, media_local_path, contact_jid in messages: msg_class = "sent" if is_from_me else "received" f.write(f'
') @@ -241,8 +246,9 @@ def generate_html_chat(db_path, media_path, output_dir, chat_id, chat_name, is_g f.write(f'
{escaped_text.replace(chr(10), "
")}
') if media_local_path: - f.write(get_media_tag(media_local_path, media_path, output_dir)) - + # print("Media path:", media_local_path) + f.write(get_media_tag(media_local_path, output_dir)) + f.write(f'
{convert_whatsapp_timestamp(timestamp)}
') f.write('
') @@ -256,25 +262,66 @@ def generate_html_chat(db_path, media_path, output_dir, chat_id, chat_name, is_g print(f"Successfully generated HTML for: {chat_name}") +# Step: iPhone backup manifest.db processing +def process_iphone_backup(backup_path, output_dir): + """ + Processes the iPhone backup manifest.db, extracts WhatsApp shared files, and recreates the file structure in output_dir. + """ + manifest_db_path = os.path.join(backup_path, 'Manifest.db') + if not os.path.exists(manifest_db_path): + print(f"Manifest.db not found in backup path: {manifest_db_path}") + return + + # Connect to manifest.db and extract WhatsApp shared files + conn = sqlite3.connect(manifest_db_path) + cursor = conn.cursor() + cursor.execute("SELECT fileID, domain, relativePath FROM Files WHERE domain = ?", ('AppDomainGroup-group.net.whatsapp.WhatsApp.shared',)) + files = cursor.fetchall() + print(f"Found {len(files)} WhatsApp shared files in manifest.db.") + # Prepare to recreate file structure + for fileID, domain, relativePath in files: + src_file = os.path.join(backup_path, fileID[:2], fileID) + dest_file = os.path.join(output_dir, relativePath) + os.makedirs(os.path.dirname(dest_file), exist_ok=True) + if os.path.exists(src_file): + if not os.path.exists(dest_file): + try: + shutil.copy2(src_file, dest_file) + except Exception as e: + print(f"Error copying {src_file} to {dest_file}: {e}") + else: + print(f"Source file missing: {src_file}") + + def main(): parser = argparse.ArgumentParser(description="WhatsApp Chat Exporter") - parser.add_argument("db_path", help="Path to the ChatStorage.sqlite file.") - parser.add_argument("media_path", help="Path to the root 'Media' directory.") parser.add_argument("--output", default="_html_export", help="Directory to save the HTML files.") - + parser.add_argument("--backup-path", default=None, help="Path to iPhone backup directory (for manifest.db processing)") args = parser.parse_args() - if not os.path.exists(args.db_path): - print(f"Error: Database file not found at '{args.db_path}'") + if args.backup_path: + process_iphone_backup(args.backup_path, args.output) + # Use backup paths for archive creation + db_path = os.path.join(args.output, "ChatStorage.sqlite") + media_path = os.path.join(args.output, "Message/") + else: + parser.add_argument("db_path", help="Path to the ChatStorage.sqlite file.") + parser.add_argument("media_path", help="Path to the root 'Media' directory.") + args = parser.parse_args() + db_path = args.db_path + media_path = args.media_path + + if not os.path.exists(db_path): + print(f"Error: Database file not found at '{db_path}'") return - if not os.path.exists(args.media_path): - print(f"Error: Media directory not found at '{args.media_path}'") + if not os.path.exists(media_path): + print(f"Error: Media directory not found at '{media_path}'") return os.makedirs(args.output, exist_ok=True) - conn = sqlite3.connect(args.db_path) + conn = sqlite3.connect(db_path) cursor = conn.cursor() # Get all chats, joining with ZWAPROFILEPUSHNAME and using COALESCE to get the best possible name. @@ -285,15 +332,22 @@ def main(): cs.ZCONTACTJID, cs.ZMESSAGECOUNTER, MIN(m.ZMESSAGEDATE) as FirstMessageDate, - MAX(m.ZMESSAGEDATE) as LastMessageDate + MAX(m.ZMESSAGEDATE) as LastMessageDate, + COALESCE(gi.ZPICTUREPATH, pic.ZPATH) AS AvatarPath FROM ZWACHATSESSION cs LEFT JOIN ZWAPROFILEPUSHNAME p ON cs.ZCONTACTJID = p.ZJID LEFT JOIN ZWAMESSAGE m ON cs.Z_PK = m.ZCHATSESSION + LEFT JOIN + ZWAGROUPINFO gi ON cs.ZGROUPINFO = gi.Z_PK + LEFT JOIN + ZWAPROFILEPICTUREITEM pic ON cs.ZCONTACTJID = pic.ZJID + WHERE + cs.ZCONTACTJID NOT LIKE '%@status' GROUP BY - cs.Z_PK, ChatName, cs.ZCONTACTJID, cs.ZMESSAGECOUNTER + cs.Z_PK, ChatName, cs.ZCONTACTJID, cs.ZMESSAGECOUNTER, AvatarPath ORDER BY LastMessageDate DESC NULLS LAST, ChatName """) @@ -426,22 +480,40 @@ def main():
    """) - for chat_id, chat_name, contact_jid, message_count, first_message_date, last_message_date in chats: + for chat_id, chat_name, contact_jid, message_count, first_message_date, last_message_date, avatar_path in chats: if not chat_name: chat_name = f"Unknown Chat ({contact_jid or chat_id})" + full_avatar_path = avatar_path if avatar_path and os.path.isabs(avatar_path) else os.path.join(args.output, avatar_path) if avatar_path else None + + # Find all file paths in args.output that start with full_avatar_path + matching_files = [] + if full_avatar_path: + for root, dirs, files in os.walk(args.output): + for file in files: + file_path = os.path.join(root, file) + if file_path.startswith(full_avatar_path): + matching_files.append(file_path) + + # Use the first matching file if available + if matching_files: + avatar_path = os.path.relpath(matching_files[0], args.output) + full_avatar_path = matching_files[0] # A group chat JID typically ends with '@g.us' is_group = contact_jid and '@g.us' in contact_jid - # Allow alphanumeric, spaces, hyphens, and emojis in filename - safe_filename = "".join(c for c in chat_name if ( - c.isalnum() or - c in (' ', '-') or - '\U0001F300' <= c <= '\U0001FAFF' # Unicode range for most emojis - )).rstrip() + # Sanitize contact_jid for a unique and safe filename + if contact_jid: + safe_filename = "".join(c if c.isalnum() else "_" for c in contact_jid) + else: + # Fallback to chat_id if contact_jid is not available + safe_filename = str(chat_id) # Add default avatar based on chat type - avatar_html = f'
    ' + if avatar_path and os.path.exists(full_avatar_path): + avatar_html = f'
    ' + else: + avatar_html = f'
    ' # Format date range date_range = "" @@ -455,8 +527,8 @@ def main(): if message_count > 0: # Generate chat HTML only for chats with messages - generate_html_chat(args.db_path, args.media_path, args.output, chat_id, chat_name, is_group) - + generate_html_chat(db_path, media_path, args.output, chat_id, chat_name, is_group, contact_jid) + # Clickable entry with link index_f.write( f'
  • '