From 5302708f73d130cc45e718b126d401dbd44369cd Mon Sep 17 00:00:00 2001 From: Andreas Demmelbauer Date: Mon, 8 Sep 2025 21:29:08 +0200 Subject: [PATCH] Add media gallery generation and improve HTML output for chats --- whatsapp_archiver.py | 741 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 721 insertions(+), 20 deletions(-) diff --git a/whatsapp_archiver.py b/whatsapp_archiver.py index 6763461..adfc269 100644 --- a/whatsapp_archiver.py +++ b/whatsapp_archiver.py @@ -22,6 +22,660 @@ def convert_whatsapp_timestamp(ts): except (ValueError, TypeError): return "Invalid date" +def get_media_tag_for_gallery(media_path, output_dir, base_path=""): + """Generates media HTML for gallery pages with proper relative paths.""" + if not media_path: + return "" + + test_path = os.path.join(output_dir, 'Message', media_path) + if not os.path.exists(test_path): + return "" + + full_media_path = os.path.join(base_path, 'Message', media_path) + full_media_path = full_media_path.lstrip('./') + full_media_path = f'../{full_media_path}' + + ext = os.path.splitext(media_path)[1].lower() + + if ext in ['.jpg', '.jpeg', '.png', '.gif', '.webp']: + return f'Image' + elif ext in ['.mp4', '.mov', '.webm']: + return f'' + elif ext in ['.mp3', '.ogg', '.opus', '.m4a']: + return f'' + else: + return f'
📎 {os.path.basename(media_path)}
' + + +def generate_all_media_gallery(db_path, output_dir): + """Generates HTML pages showing all media files sorted by time with pagination.""" + conn = sqlite3.connect(db_path) + cursor = conn.cursor() + + # Get all media messages with chat and sender information + query = """ + SELECT + m.ZMESSAGEDATE, + mi.ZMEDIALOCALPATH, + m.ZISFROMME, + m.ZFROMJID, + cs.ZPARTNERNAME AS ChatName, + cs.ZCONTACTJID, + gm_p.ZPUSHNAME AS GroupMemberName, + p.ZPUSHNAME AS PushName, + cs.Z_PK as ChatID, + gm.ZMEMBERJID AS GroupMemberJID, + sender_cs.ZPARTNERNAME AS SenderPartnerName, + gm_fallback_p.ZPUSHNAME AS GroupMemberNameFallback + FROM + ZWAMESSAGE m + LEFT JOIN + ZWAMEDIAITEM mi ON m.ZMEDIAITEM = mi.Z_PK + LEFT JOIN + ZWACHATSESSION cs ON m.ZCHATSESSION = cs.Z_PK + LEFT JOIN + ZWAGROUPMEMBER gm ON gm.Z_PK = m.ZGROUPMEMBER + LEFT JOIN + ZWAPROFILEPUSHNAME gm_p ON gm.ZMEMBERJID = gm_p.ZJID + LEFT JOIN + ZWAPROFILEPUSHNAME p ON m.ZFROMJID = p.ZJID + LEFT JOIN + ZWACHATSESSION sender_cs ON sender_cs.ZCONTACTJID = m.ZFROMJID + LEFT JOIN + ZWAGROUPMEMBER gm_fallback ON gm_fallback.ZCHATSESSION = m.ZCHATSESSION AND gm_fallback.ZMEMBERJID = m.ZFROMJID + LEFT JOIN + ZWAPROFILEPUSHNAME gm_fallback_p ON gm_fallback.ZMEMBERJID = gm_fallback_p.ZJID + WHERE + mi.ZMEDIALOCALPATH IS NOT NULL + AND cs.ZCONTACTJID NOT LIKE '%@status' + ORDER BY + m.ZMESSAGEDATE DESC; + """ + + cursor.execute(query) + all_media_messages = cursor.fetchall() + conn.close() + + # Filter out messages without valid media paths + valid_media_messages = [] + for msg in all_media_messages: + if msg[1] and os.path.exists(os.path.join(output_dir, 'Message', msg[1])): + valid_media_messages.append(msg) + + total_media = len(valid_media_messages) + items_per_page = 120 # Show 120 media items per page + total_pages = (total_media + items_per_page - 1) // items_per_page + + if total_pages == 0: + total_pages = 1 + + # Generate each page + for page_num in range(1, total_pages + 1): + start_idx = (page_num - 1) * items_per_page + end_idx = min(start_idx + items_per_page, total_media) + page_media_messages = valid_media_messages[start_idx:end_idx] + + # Determine filename + if page_num == 1: + filename = "media-gallery.html" + else: + filename = f"media-gallery-page-{page_num}.html" + + media_gallery_path = os.path.join(output_dir, filename) + + with open(media_gallery_path, 'w', encoding='utf-8') as f: + f.write(f""" + + + + + + WhatsApp Media Gallery - Page {page_num} + + + +
+

📷 Media Gallery

+ +
+
+
+ Showing {start_idx + 1}-{end_idx} of {total_media} media files (Page {page_num} of {total_pages}) +
+ """) + + # Add pagination controls + f.write('') + + # Media grid + f.write('
') + + for message_date, media_path, is_from_me, from_jid, chat_name, contact_jid, group_member_name, push_name, chat_id, group_member_jid, sender_partner_name, group_member_name_fallback in page_media_messages: + if not media_path: + continue + + # Determine sender name + if is_from_me: + sender_name = "You" + else: + # For group messages, prioritize ZCONTACTNAME from ZWAGROUPMEMBER linked via ZGROUPMEMBER + sender_name = group_member_name or group_member_name_fallback # Try direct link first, then fallback via ZFROMJID + + if not sender_name: + # Try sender's partner name from their individual chat session + sender_name = sender_partner_name or push_name + + if not sender_name: + # Check if this is a group chat and ZFROMJID is the group JID (can't determine individual sender) + if '@g.us' in str(contact_jid or '') and from_jid and '@g.us' in from_jid: + sender_name = "Group Member" # Generic fallback for unidentifiable group messages + elif group_member_jid and '@' in group_member_jid: + phone_number = group_member_jid.split('@')[0] + sender_name = f"+{phone_number}" if phone_number.isdigit() else group_member_jid + elif from_jid and '@' in from_jid: + phone_number = from_jid.split('@')[0] + sender_name = f"+{phone_number}" if phone_number.isdigit() else from_jid + else: + sender_name = "Unknown" + + # Generate media HTML + media_html = get_media_tag_for_gallery(media_path, output_dir) + if not media_html: + continue + + # Sanitize contact_jid for filename + if contact_jid: + safe_filename = "".join(c if c.isalnum() else "_" for c in contact_jid) + else: + safe_filename = str(chat_id) + + f.write(f""" +
+
+ {html.escape(str(chat_name or "Unknown Chat"))} + • {html.escape(str(sender_name))} + {convert_whatsapp_timestamp(message_date)} +
+ + {media_html} + + 📁 Open File +
+ """) + + f.write('
') + + # Add pagination controls at bottom + f.write('') + + f.write(""" +
+ + + """) + + print(f"Generated {total_pages} media gallery pages with {total_media} total media files") + + +def generate_chat_media_gallery(db_path, output_dir, chat_id, chat_name, contact_jid): + """Generates an HTML page showing all media files for a specific chat.""" + conn = sqlite3.connect(db_path) + cursor = conn.cursor() + + # Get media messages for this specific chat + query = """ + SELECT + m.ZMESSAGEDATE, + mi.ZMEDIALOCALPATH, + m.ZISFROMME, + m.ZFROMJID, + gm_p.ZPUSHNAME AS GroupMemberName, + p.ZPUSHNAME AS PushName, + gm.ZMEMBERJID AS GroupMemberJID, + sender_cs.ZPARTNERNAME AS SenderPartnerName, + gm_fallback_p.ZPUSHNAME AS GroupMemberNameFallback + FROM + ZWAMESSAGE m + LEFT JOIN + ZWAMEDIAITEM mi ON m.ZMEDIAITEM = mi.Z_PK + LEFT JOIN + ZWAGROUPMEMBER gm ON gm.Z_PK = m.ZGROUPMEMBER + LEFT JOIN + ZWAPROFILEPUSHNAME gm_p ON gm.ZMEMBERJID = gm_p.ZJID + LEFT JOIN + ZWAPROFILEPUSHNAME p ON m.ZFROMJID = p.ZJID + LEFT JOIN + ZWACHATSESSION sender_cs ON sender_cs.ZCONTACTJID = m.ZFROMJID + LEFT JOIN + ZWAGROUPMEMBER gm_fallback ON gm_fallback.ZCHATSESSION = m.ZCHATSESSION AND gm_fallback.ZMEMBERJID = m.ZFROMJID + LEFT JOIN + ZWAPROFILEPUSHNAME gm_fallback_p ON gm_fallback.ZMEMBERJID = gm_fallback_p.ZJID + WHERE + m.ZCHATSESSION = ? + AND mi.ZMEDIALOCALPATH IS NOT NULL + ORDER BY + m.ZMESSAGEDATE DESC; + """ + + cursor.execute(query, (chat_id,)) + media_messages = cursor.fetchall() + conn.close() + + if not media_messages: + return # No media to display + + # Sanitize contact_jid for filename + if contact_jid: + safe_filename = "".join(c if c.isalnum() else "_" for c in contact_jid) + else: + safe_filename = str(chat_id) + + media_dir = os.path.join(output_dir, "media") + os.makedirs(media_dir, exist_ok=True) + media_gallery_path = os.path.join(media_dir, f"{safe_filename}.html") + + with open(media_gallery_path, 'w', encoding='utf-8') as f: + f.write(f""" + + + + + + Media from {html.escape(str(chat_name))} + + + +
+

📷 Media from {html.escape(str(chat_name))}

+ +
+
+

{len(media_messages)} media files in this chat, sorted by date (newest first).

+
+ """) + + for message_date, media_path, is_from_me, from_jid, group_member_name, push_name, group_member_jid, sender_partner_name, group_member_name_fallback in media_messages: + if not media_path: + continue + + # Determine sender name + if is_from_me: + sender_name = "You" + else: + # For group messages, prioritize ZCONTACTNAME from ZWAGROUPMEMBER linked via ZGROUPMEMBER + sender_name = group_member_name or group_member_name_fallback # Try direct link first, then fallback via ZFROMJID + + if not sender_name: + # Try sender's partner name from their individual chat session + sender_name = sender_partner_name or push_name + + if not sender_name: + # Check if this is a group chat and ZFROMJID is the group JID (can't determine individual sender) + if contact_jid and '@g.us' in contact_jid and from_jid and '@g.us' in from_jid: + sender_name = "Group Member" # Generic fallback for unidentifiable group messages + elif group_member_jid and '@' in group_member_jid: + phone_number = group_member_jid.split('@')[0] + sender_name = f"+{phone_number}" if phone_number.isdigit() else group_member_jid + elif from_jid and '@' in from_jid: + phone_number = from_jid.split('@')[0] + sender_name = f"+{phone_number}" if phone_number.isdigit() else from_jid + else: + sender_name = "Unknown" + + # Generate media HTML with proper relative path + media_html = get_media_tag_for_gallery(media_path, output_dir, "../") + if not media_html: + continue + + f.write(f""" +
+
+ {html.escape(str(sender_name))} + {convert_whatsapp_timestamp(message_date)} +
+ {media_html} + 📁 Open File +
+ """) + + f.write(""" +
+
+ + + """) + + print(f"Generated chat media gallery for: {chat_name}") + + def get_media_tag(media_path, output_dir): """Generates the appropriate HTML tag for a given media file and copies it.""" if not media_path: @@ -53,28 +707,39 @@ def generate_html_chat(db_path, media_path, output_dir, chat_id, chat_name, is_g conn = sqlite3.connect(db_path) cursor = conn.cursor() - # Updated query to fetch more potential name fields (like ZFIRSTNAME) to find the best one. + # Updated query to fetch more potential name fields and properly resolve group member names query = """ SELECT m.ZISFROMME, m.ZTEXT, m.ZMESSAGEDATE, m.ZFROMJID, - g.ZCONTACTNAME AS GroupMemberContactName, + gm_p.ZPUSHNAME AS GroupMemberContactName, cs.ZPARTNERNAME AS ChatPartnerName, p.ZPUSHNAME AS ProfilePushName, mi.ZMEDIALOCALPATH, - cs.ZCONTACTJID AS ChatJID + cs.ZCONTACTJID AS ChatJID, + gm.ZMEMBERJID AS GroupMemberJID, + sender_cs.ZPARTNERNAME AS SenderPartnerName, + gm_fallback_p.ZPUSHNAME AS GroupMemberContactNameFallback FROM ZWAMESSAGE m - LEFT JOIN - ZWAGROUPMEMBER g ON m.ZGROUPMEMBER = g.Z_PK LEFT JOIN ZWACHATSESSION cs ON m.ZCHATSESSION = cs.Z_PK + LEFT JOIN + ZWAGROUPMEMBER gm ON gm.Z_PK = m.ZGROUPMEMBER + LEFT JOIN + ZWAPROFILEPUSHNAME gm_p ON gm.ZMEMBERJID = gm_p.ZJID LEFT JOIN ZWAPROFILEPUSHNAME p ON m.ZFROMJID = p.ZJID LEFT JOIN ZWAMEDIAITEM mi ON m.ZMEDIAITEM = mi.Z_PK + LEFT JOIN + ZWACHATSESSION sender_cs ON sender_cs.ZCONTACTJID = m.ZFROMJID + LEFT JOIN + ZWAGROUPMEMBER gm_fallback ON gm_fallback.ZCHATSESSION = m.ZCHATSESSION AND gm_fallback.ZMEMBERJID = m.ZFROMJID + LEFT JOIN + ZWAPROFILEPUSHNAME gm_fallback_p ON gm_fallback.ZMEMBERJID = gm_fallback_p.ZJID WHERE m.ZCHATSESSION = ? ORDER BY @@ -136,6 +801,21 @@ def generate_html_chat(db_path, media_path, output_dir, chat_id, chat_name, is_g font-size: 1.2em; text-align: center; }} + .nav-links {{ + margin-top: 8px; + font-size: 0.8em; + }} + .nav-links a {{ + color: rgba(255,255,255,0.9); + text-decoration: none; + margin: 0 8px; + padding: 3px 8px; + border-radius: 3px; + transition: background-color 0.2s; + }} + .nav-links a:hover {{ + background-color: rgba(255,255,255,0.1); + }} .chat-header-id {{ @@ -200,12 +880,16 @@ def generate_html_chat(db_path, media_path, output_dir, chat_id, chat_name, is_g
{html.escape(chat_name)}
{contact_jid}
+
""") # Write messages - for is_from_me, text, timestamp, from_jid, group_member_contact_name, chat_partner_name, profile_push_name, media_local_path, contact_jid in messages: + for is_from_me, text, timestamp, from_jid, group_member_contact_name, chat_partner_name, profile_push_name, media_local_path, contact_jid, group_member_jid, sender_partner_name, group_member_contact_name_fallback in messages: msg_class = "sent" if is_from_me else "received" f.write(f'
') @@ -214,20 +898,25 @@ def generate_html_chat(db_path, media_path, output_dir, chat_id, chat_name, is_g if not is_from_me: # Prioritize group member contact name for group chats if is_group: - # Try names in order of preference, avoiding encoded-looking strings - potential_names = [ - group_member_contact_name, - profile_push_name, - from_jid, - chat_partner_name, - ] + # For group messages, prioritize ZCONTACTNAME from ZWAGROUPMEMBER linked via ZGROUPMEMBER + sender_name = group_member_contact_name or group_member_contact_name_fallback # Try direct link first, then fallback via ZFROMJID - # Filter out None values and strings that look like they're encoded - valid_names = [name for name in potential_names if name and not ( - name.startswith('CK') and any(c.isupper() for c in name[2:]) and '=' in name - )] - - sender_name = next((name for name in valid_names), "Unknown") + if not sender_name: + # Try sender's partner name from their individual chat session + sender_name = sender_partner_name or profile_push_name + + if not sender_name: + # Check if this is a group chat and ZFROMJID is the group JID (can't determine individual sender) + if contact_jid and '@g.us' in contact_jid and from_jid and '@g.us' in from_jid: + sender_name = "Group Member" # Generic fallback for unidentifiable group messages + elif group_member_jid and '@' in group_member_jid: + phone_number = group_member_jid.split('@')[0] + sender_name = f"+{phone_number}" if phone_number.isdigit() else group_member_jid + elif from_jid and '@' in from_jid: + phone_number = from_jid.split('@')[0] + sender_name = f"+{phone_number}" if phone_number.isdigit() else from_jid + else: + sender_name = "Unknown" else: # For individual chats, prefer partner name or push name sender_name = chat_partner_name or profile_push_name or from_jid or "Unknown" @@ -298,7 +987,7 @@ def process_iphone_backup(backup_path, output_dir): os.makedirs(os.path.dirname(dest_file), exist_ok=True) if not os.path.exists(src_file): - print(f"Source file missing: {src_file}") + # print(f"Source file missing: {src_file}") skipped_files += 1 continue @@ -668,6 +1357,9 @@ def main():

WhatsApp Chat Export

Exported on {datetime.now().strftime('%Y-%m-%d %H:%M')}
+
+ 📷 View All Media +
    @@ -720,6 +1412,9 @@ def main(): if message_count > 0: # Generate chat HTML only for chats with messages generate_html_chat(db_path, media_path, args.output, chat_id, chat_name, is_group, contact_jid) + + # Generate individual chat media gallery + generate_chat_media_gallery(db_path, args.output, chat_id, chat_name, contact_jid) # Clickable entry with link index_f.write( @@ -747,6 +1442,9 @@ def main(): index_f.write("
") + # Generate the all-media gallery + generate_all_media_gallery(db_path, args.output) + # Create a simple redirect index.html redirect_index = os.path.join(args.output, "index.html") with open(redirect_index, 'w', encoding='utf-8') as f: @@ -765,6 +1463,9 @@ def main(): print(f"View your chats by opening either of these files in your browser:") print(f" • {os.path.abspath(index_path)}") print(f" • {os.path.abspath(redirect_index)}") + print(f"\nAdditional features:") + print(f" • Media Gallery: {os.path.abspath(os.path.join(args.output, 'media-gallery.html'))}") + print(f" • Individual chat media galleries available in the media/ folder") if __name__ == "__main__":