|
- # WhatsApp Chat Viewer
- #
- # This script reads a WhatsApp ChatStorage.sqlite database and associated media files
- # to generate a browsable HTML representation of your chats.
- #
- # Author: Gemini
- # Date: September 7, 2025
- # Version: 1.3 - Improved name resolution to avoid displaying encoded strings.
-
- import sqlite3
- import os
- import argparse
- import html
- from datetime import datetime, timedelta
- import shutil
-
- # WhatsApp's epoch starts on 2001-01-01 00:00:00 (Core Data timestamp)
- WHATSAPP_EPOCH = datetime(2001, 1, 1)
-
- def convert_whatsapp_timestamp(ts):
- """Converts WhatsApp's Core Data timestamp to a human-readable string."""
- if not ts:
- return ""
- try:
- # Timestamps are seconds since the WhatsApp epoch
- dt = WHATSAPP_EPOCH + timedelta(seconds=ts)
- return dt.strftime('%Y-%m-%d %H:%M:%S')
- except (ValueError, TypeError):
- return "Invalid date"
-
- def get_media_tag(media_path, media_root_dir, output_dir):
- """Generates the appropriate HTML tag for a given media file and copies it."""
- if not media_path:
- return ""
-
- # Path in the DB is often relative like 'Media/WhatsApp Images/IMG-...'
- full_media_path = os.path.join(media_root_dir, os.path.basename(media_path))
-
- # Sometimes the path is nested inside a subdirectory within the main Media folder
- if not os.path.exists(full_media_path):
- full_media_path = os.path.join(media_root_dir, media_path)
-
- if not os.path.exists(full_media_path):
- return f'<div class="media-missing">Media not found: {html.escape(media_path)}</div>'
-
- # Create a unique-ish path to avoid filename collisions
- relative_media_path = os.path.join('media', os.path.basename(media_path))
- dest_path = os.path.join(output_dir, relative_media_path)
-
- os.makedirs(os.path.dirname(dest_path), exist_ok=True)
-
- if not os.path.exists(dest_path):
- try:
- shutil.copy(full_media_path, dest_path)
- except Exception as e:
- return f'<div class="media-missing">Error copying media: {html.escape(str(e))}</div>'
-
-
- ext = os.path.splitext(media_path)[1].lower()
-
- if ext in ['.jpg', '.jpeg', '.png', '.gif', '.webp']:
- return f'<img src="{relative_media_path}" alt="Image" class="media-item">'
- elif ext in ['.mp4', '.mov', '.webm']:
- return f'<video controls src="{relative_media_path}" class="media-item"></video>'
- elif ext in ['.mp3', '.ogg', '.opus', '.m4a']:
- return f'<audio controls src="{relative_media_path}"></audio>'
- else:
- return f'<a href="{relative_media_path}" target="_blank">View Media: {os.path.basename(media_path)}</a>'
-
- def generate_html_chat(db_path, media_path, output_dir, chat_id, chat_name, is_group):
- """Generates an HTML file for a single chat session."""
- conn = sqlite3.connect(db_path)
- cursor = conn.cursor()
-
- # Updated query to fetch more potential name fields (like ZFIRSTNAME) to find the best one.
- query = """
- SELECT
- m.ZISFROMME,
- m.ZTEXT,
- m.ZMESSAGEDATE,
- m.ZFROMJID,
- g.ZCONTACTNAME AS GroupMemberContactName,
- cs.ZPARTNERNAME AS ChatPartnerName,
- p.ZPUSHNAME AS ProfilePushName,
- mi.ZMEDIALOCALPATH
- FROM
- ZWAMESSAGE m
- LEFT JOIN
- ZWAGROUPMEMBER g ON m.ZGROUPMEMBER = g.Z_PK
- LEFT JOIN
- ZWACHATSESSION cs ON m.ZCHATSESSION = cs.Z_PK
- LEFT JOIN
- ZWAPROFILEPUSHNAME p ON m.ZFROMJID = p.ZJID
- LEFT JOIN
- ZWAMEDIAITEM mi ON m.ZMEDIAITEM = mi.Z_PK
- WHERE
- m.ZCHATSESSION = ?
- ORDER BY
- m.ZMESSAGEDATE ASC;
- """
-
- cursor.execute(query, (chat_id,))
- messages = cursor.fetchall()
- conn.close()
-
- if not messages:
- print(f"No messages found for chat: {chat_name}")
- return
-
- # Sanitize chat name for filename, allowing emojis
- safe_filename = "".join(c for c in chat_name if (
- c.isalnum() or
- c in (' ', '-') or
- '\U0001F300' <= c <= '\U0001FAFF' # Unicode range for most emojis
- )).rstrip()
- chats_dir = os.path.join(output_dir, "chats")
- os.makedirs(chats_dir, exist_ok=True)
- html_filename = os.path.join(chats_dir, f"{safe_filename}.html")
-
- with open(html_filename, 'w', encoding='utf-8') as f:
- f.write(f"""
- <!DOCTYPE html>
- <html lang="en">
- <head>
- <meta charset="UTF-8">
- <meta name="viewport" content="width=device-width, initial-scale=1.0">
- <title>Chat with {html.escape(chat_name)}</title>
- <style>
- body {{
- font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Helvetica, Arial, sans-serif;
- background-color: #e5ddd5;
- margin: 0;
- padding: 20px;
- color: #111b21;
- }}
- .chat-container {{
- max-width: 800px;
- margin: auto;
- background-image: url('https://user-images.githubusercontent.com/15075759/28719144-86dc0f70-73b1-11e7-911d-60d70fcded21.png'); /* Subtle background pattern */
- border-radius: 8px;
- box-shadow: 0 1px 1px 0 rgba(0,0,0,0.06), 0 2px 5px 0 rgba(0,0,0,0.06);
- overflow: hidden;
- }}
- .chat-header {{
- background-color: #008069;
- color: white;
- padding: 15px 20px;
- font-size: 1.2em;
- text-align: center;
- }}
- .chat-box {{
- padding: 20px;
- display: flex;
- flex-direction: column;
- gap: 12px;
- }}
- .message {{
- padding: 8px 12px;
- border-radius: 18px;
- max-width: 70%;
- word-wrap: break-word;
- position: relative;
- }}
- .message.sent {{
- background-color: #dcf8c6;
- align-self: flex-end;
- border-bottom-right-radius: 4px;
- }}
- .message.received {{
- background-color: #ffffff;
- align-self: flex-start;
- border-bottom-left-radius: 4px;
- }}
- .sender-name {{
- font-weight: bold;
- font-size: 0.9em;
- color: #005c4b;
- margin-bottom: 4px;
- }}
- .timestamp {{
- font-size: 0.75em;
- color: #667781;
- margin-top: 5px;
- text-align: right;
- }}
- .media-item {{
- max-width: 100%;
- border-radius: 8px;
- margin-top: 5px;
- display: block;
- }}
- .media-missing {{
- font-style: italic;
- color: #888;
- background-color: #fcebeb;
- border: 1px solid #f5c6cb;
- padding: 10px;
- border-radius: 8px;
- }}
- </style>
- </head>
- <body>
- <div class="chat-container">
- <div class="chat-header">{html.escape(chat_name)}</div>
- <div class="chat-box">
- """)
-
- # Write messages
- for is_from_me, text, timestamp, from_jid, group_member_contact_name, chat_partner_name, profile_push_name, media_local_path in messages:
- msg_class = "sent" if is_from_me else "received"
-
- f.write(f'<div class="message {msg_class}">')
-
- # Determine and display the sender's name for incoming messages
- if not is_from_me:
- # Prioritize group member contact name for group chats
- if is_group:
- # Try names in order of preference, avoiding encoded-looking strings
- potential_names = [
- group_member_contact_name,
- profile_push_name,
- from_jid,
- chat_partner_name,
- ]
-
- # Filter out None values and strings that look like they're encoded
- valid_names = [name for name in potential_names if name and not (
- name.startswith('CK') and any(c.isupper() for c in name[2:]) and '=' in name
- )]
-
- sender_name = next((name for name in valid_names), "Unknown")
- else:
- # For individual chats, prefer partner name or push name
- sender_name = chat_partner_name or profile_push_name or from_jid or "Unknown"
-
- f.write(f'<div class="sender-name">{html.escape(str(sender_name))}</div>')
-
- if text:
- # Replace newline characters with <br> tags for proper display
- escaped_text = html.escape(text)
- f.write(f'<div>{escaped_text.replace(chr(10), "<br>")}</div>')
-
- if media_local_path:
- f.write(get_media_tag(media_local_path, media_path, output_dir))
-
- f.write(f'<div class="timestamp">{convert_whatsapp_timestamp(timestamp)}</div>')
- f.write('</div>')
-
- f.write("""
- </div>
- </div>
- </body>
- </html>
- """)
-
- print(f"Successfully generated HTML for: {chat_name}")
-
-
- def main():
- parser = argparse.ArgumentParser(description="WhatsApp Chat Exporter")
- parser.add_argument("db_path", help="Path to the ChatStorage.sqlite file.")
- parser.add_argument("media_path", help="Path to the root 'Media' directory.")
- parser.add_argument("--output", default="_html_export", help="Directory to save the HTML files.")
-
- args = parser.parse_args()
-
- if not os.path.exists(args.db_path):
- print(f"Error: Database file not found at '{args.db_path}'")
- return
-
- if not os.path.exists(args.media_path):
- print(f"Error: Media directory not found at '{args.media_path}'")
- return
-
- os.makedirs(args.output, exist_ok=True)
-
- conn = sqlite3.connect(args.db_path)
- cursor = conn.cursor()
-
- # Get all chats, joining with ZWAPROFILEPUSHNAME and using COALESCE to get the best possible name.
- cursor.execute("""
- SELECT
- cs.Z_PK,
- COALESCE(p.ZPUSHNAME, cs.ZPARTNERNAME) AS ChatName,
- cs.ZCONTACTJID,
- cs.ZMESSAGECOUNTER,
- MIN(m.ZMESSAGEDATE) as FirstMessageDate,
- MAX(m.ZMESSAGEDATE) as LastMessageDate
- FROM
- ZWACHATSESSION cs
- LEFT JOIN
- ZWAPROFILEPUSHNAME p ON cs.ZCONTACTJID = p.ZJID
- LEFT JOIN
- ZWAMESSAGE m ON cs.Z_PK = m.ZCHATSESSION
- GROUP BY
- cs.Z_PK, ChatName, cs.ZCONTACTJID, cs.ZMESSAGECOUNTER
- ORDER BY
- LastMessageDate DESC NULLS LAST, ChatName
- """)
- chats = cursor.fetchall()
- conn.close()
-
- print(f"Found {len(chats)} chats to export.")
-
- index_path = os.path.join(args.output, "whatsapp-chats.html")
- with open(index_path, 'w', encoding='utf-8') as index_f:
- index_f.write(f"""
- <!DOCTYPE html>
- <html lang="en">
- <head>
- <meta charset="UTF-8">
- <meta name="viewport" content="width=device-width, initial-scale=1.0">
- <title>WhatsApp Chat Export</title>
- <link rel="icon" href="data:image/svg+xml,<svg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 24 24'><path fill='%23128C7E' d='M12 2C6.5 2 2 6.5 2 12c0 2 .6 3.9 1.6 5.4L2 22l4.6-1.6c1.5 1 3.4 1.6 5.4 1.6 5.5 0 10-4.5 10-10S17.5 2 12 2z'/></svg>">
- <style>
- body {{
- font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Helvetica, Arial, sans-serif;
- background-color: #f4f4f9;
- margin: 0;
- padding: 20px;
- min-height: 100vh;
- }}
- .header {{
- background-color: #128C7E;
- color: white;
- padding: 20px;
- margin: -20px -20px 20px -20px;
- text-align: center;
- box-shadow: 0 2px 4px rgba(0,0,0,0.1);
- }}
- .header h1 {{
- margin: 0;
- font-size: 1.8em;
- }}
- .export-info {{
- color: rgba(255,255,255,0.9);
- margin-top: 8px;
- font-size: 0.9em;
- }}
- .container {{
- max-width: 700px;
- margin: auto;
- background: white;
- padding: 20px;
- border-radius: 12px;
- box-shadow: 0 2px 8px rgba(0,0,0,0.1);
- }}
- ul {{ list-style-type: none; padding: 0; }}
- li {{ margin: 8px 0; }}
- .chat-entry {{
- text-decoration: none;
- color: #0056b3;
- background-color: #fff;
- padding: 12px;
- border-radius: 8px;
- display: flex;
- align-items: center;
- box-shadow: 0 2px 4px rgba(0,0,0,0.1);
- transition: all 0.2s ease-in-out;
- gap: 12px;
- }}
- a.chat-entry:hover {{
- background-color: #e9ecef;
- transform: translateY(-2px);
- box-shadow: 0 4px 8px rgba(0,0,0,0.15);
- }}
- .chat-entry.inactive {{
- color: #999;
- background-color: #f8f9fa;
- cursor: default;
- }}
- .chat-avatar {{
- width: 48px;
- height: 48px;
- border-radius: 50%;
- background-size: cover;
- background-position: center;
- flex-shrink: 0;
- }}
- .chat-avatar.default-individual {{
- background-color: #DFE5E7;
- background-image: url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="%23999"><path d="M12 12c2.21 0 4-1.79 4-4s-1.79-4-4-4-4 1.79-4 4 1.79 4 4 4zm0 2c-2.67 0-8 1.34-8 4v2h16v-2c0-2.66-5.33-4-8-4z"/></svg>');
- }}
- .chat-avatar.default-group {{
- background-color: #DFE5E7;
- background-image: url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="%23999"><path d="M16 11c1.66 0 2.99-1.34 2.99-3S17.66 5 16 5c-1.66 0-3 1.34-3 3s1.34 3 3 3zm-8 0c1.66 0 2.99-1.34 2.99-3S9.66 5 8 5C6.34 5 5 6.34 5 8s1.34 3 3 3zm0 2c-2.33 0-7 1.17-7 3.5V19h14v-2.5c0-2.33-4.67-3.5-7-3.5zm8 0c-.29 0-.62.02-.97.05 1.16.84 1.97 1.97 1.97 3.45V19h6v-2.5c0-2.33-4.67-3.5-7-3.5z"/></svg>');
- }}
- .chat-info {{
- flex-grow: 1;
- min-width: 0;
- }}
- .message-count {{
- background-color: #128C7E;
- color: white;
- padding: 4px 8px;
- border-radius: 12px;
- font-size: 0.85em;
- min-width: 24px;
- text-align: center;
- }}
- .message-count.zero {{
- background-color: #ddd;
- }}
- .chat-info {{
- display: flex;
- flex-direction: column;
- gap: 4px;
- }}
- .chat-name {{
- font-weight: 500;
- }}
- .date-range {{
- font-size: 0.8em;
- color: #667781;
- }}
- .chat-entry.inactive .date-range {{
- color: #999;
- }}
- </style>
- </head>
- <body>
- <div class="header">
- <h1>WhatsApp Chat Export</h1>
- <div class="export-info">Exported on {datetime.now().strftime('%Y-%m-%d %H:%M')}</div>
- </div>
- <div class="container">
- <ul>
- """)
- for chat_id, chat_name, contact_jid, message_count, first_message_date, last_message_date in chats:
- if not chat_name:
- chat_name = f"Unknown Chat ({contact_jid or chat_id})"
-
- # A group chat JID typically ends with '@g.us'
- is_group = contact_jid and '@g.us' in contact_jid
-
- # Allow alphanumeric, spaces, hyphens, and emojis in filename
- safe_filename = "".join(c for c in chat_name if (
- c.isalnum() or
- c in (' ', '-') or
- '\U0001F300' <= c <= '\U0001FAFF' # Unicode range for most emojis
- )).rstrip()
-
- # Add default avatar based on chat type
- avatar_html = f'<div class="chat-avatar default-{"group" if is_group else "individual"}"></div>'
-
- # Format date range
- date_range = ""
- if message_count > 0 and first_message_date and last_message_date:
- first_date = convert_whatsapp_timestamp(first_message_date).split()[0] # Get just the date part
- last_date = convert_whatsapp_timestamp(last_message_date).split()[0]
- if first_date == last_date:
- date_range = first_date
- else:
- date_range = f"{first_date} – {last_date}"
-
- if message_count > 0:
- # Generate chat HTML only for chats with messages
- generate_html_chat(args.db_path, args.media_path, args.output, chat_id, chat_name, is_group)
-
- # Clickable entry with link
- index_f.write(
- f'<li><a class="chat-entry" href="chats/{html.escape(safe_filename)}.html">'
- f'{avatar_html}'
- f'<div class="chat-info">'
- f'<span class="chat-name">{html.escape(str(chat_name))}</span>'
- f'<span class="date-range">{date_range}</span>'
- f'</div>'
- f'<span class="message-count">{message_count:,}</span>'
- f'</a></li>'
- )
- else:
- # Non-clickable entry for empty chats
- index_f.write(
- f'<li><div class="chat-entry inactive">'
- f'{avatar_html}'
- f'<div class="chat-info">'
- f'<span class="chat-name">{html.escape(str(chat_name))}</span>'
- f'<span class="date-range">No messages</span>'
- f'</div>'
- f'<span class="message-count zero">0</span>'
- f'</div></li>'
- )
-
- index_f.write("</ul></div></body></html>")
-
- # Create a simple redirect index.html
- redirect_index = os.path.join(args.output, "index.html")
- with open(redirect_index, 'w', encoding='utf-8') as f:
- f.write(f"""<!DOCTYPE html>
- <html>
- <head>
- <meta http-equiv="refresh" content="0; url=whatsapp-chats.html">
- <title>Redirecting to WhatsApp Chats...</title>
- </head>
- <body>
- <p>Redirecting to <a href="whatsapp-chats.html">WhatsApp Chats</a>...</p>
- </body>
- </html>""")
-
- print(f"\nExport complete!")
- print(f"View your chats by opening either of these files in your browser:")
- print(f" • {os.path.abspath(index_path)}")
- print(f" • {os.path.abspath(redirect_index)}")
-
-
- if __name__ == "__main__":
- main()
|