From 23820227d5dcf4f8791a31b21bea73a81b2fc725 Mon Sep 17 00:00:00 2001 From: Andreas Demmelbauer Date: Tue, 9 Sep 2025 08:42:23 +0200 Subject: [PATCH] Update README and refactor output path handling in WhatsApp archiver --- README.md | 8 ++- whatsapp_archiver.py | 159 ++++++++++++++++++++++--------------------- 2 files changed, 85 insertions(+), 82 deletions(-) diff --git a/README.md b/README.md index a9baf47..39e7700 100644 --- a/README.md +++ b/README.md @@ -33,13 +33,15 @@ The tool navigates this complex structure automatically to extract the WhatsApp Run the exporter with your iOS backup path: ``` -python3 whatsapp_exporter.py --backup-path=./iphone-backup/46de1f4ca4a30b155985910d009edaf586236798/ --output=./export/ +python3 whatsapp_exporter.py \ + --backup-path="/Volume/BackupMedia/iphone-backup/46de1f4ca4a30b155985910d009edaf586236798/" \ + --output-path="/Volume/BackupMedia/whatsapp-archive/data" ``` ### Arguments - `--backup-path`: Path to the iOS backup directory (containing Manifest.db) -- `--output`: Directory to save the HTML files (default: _html_export) +- `--output-path`: Directory to save whatsapp archive (appends if exists) ## Archival Features @@ -54,7 +56,7 @@ This tool acts as an archiver that: If you already have extracted WhatsApp database files, you can use them directly. However, it's not well tested: ``` -python3 whatsapp_exporter.py /path/to/ChatStorage.sqlite /path/to/Media/ --output=./export/ +python3 whatsapp_exporter.py /path/to/ChatStorage.sqlite /path/to/Media/ --output-path=./export/ ``` ## Limitations and Caveats diff --git a/whatsapp_archiver.py b/whatsapp_archiver.py index 8b8547d..15e9ae4 100644 --- a/whatsapp_archiver.py +++ b/whatsapp_archiver.py @@ -983,74 +983,75 @@ def process_iphone_backup(backup_path, output_dir): 'Ranking.sqlite', 'Sticker.sqlite' ] - - # # Prepare to recreate file structure - # for fileID, domain, relativePath in files: - # src_file = os.path.join(backup_path, fileID[:2], fileID) - # dest_file = os.path.join(output_dir, relativePath) - # os.makedirs(os.path.dirname(dest_file), exist_ok=True) + + print('Copying WhatsApp shared files to archive location...') + # Prepare to recreate file structure + for fileID, domain, relativePath in files: + src_file = os.path.join(backup_path, fileID[:2], fileID) + dest_file = os.path.join(output_dir, relativePath) + os.makedirs(os.path.dirname(dest_file), exist_ok=True) - # if not os.path.exists(src_file): - # # print(f"Source file missing: {src_file}") - # skipped_files += 1 - # continue + if not os.path.exists(src_file): + # print(f"Source file missing: {src_file}") + skipped_files += 1 + continue - # # Handle SQLite database files specially - merge data instead of overwriting - # file_basename = os.path.basename(dest_file) - # if file_basename in db_files_to_merge and os.path.exists(dest_file): - # special_db_files += 1 - # try: - # # For SQLite databases, we need to merge the data - # if file_basename == 'ChatStorage.sqlite': - # merge_chat_database(src_file, dest_file) - # else: - # # For other SQLite databases, make a backup and then replace - # # Future enhancement: implement proper merging for all database types - # backup_file = f"{dest_file}.backup_{datetime.now().strftime('%Y%m%d%H%M%S')}" - # shutil.copy2(dest_file, backup_file) - # print(f"Created backup of {file_basename} as {os.path.basename(backup_file)}") - # shutil.copy2(src_file, dest_file) - # except Exception as e: - # print(f"Error processing database {dest_file}: {e}") - # continue + # Handle SQLite database files specially - merge data instead of overwriting + file_basename = os.path.basename(dest_file) + if file_basename in db_files_to_merge and os.path.exists(dest_file): + special_db_files += 1 + try: + # For SQLite databases, we need to merge the data + if file_basename == 'ChatStorage.sqlite': + merge_chat_database(src_file, dest_file) + else: + # For other SQLite databases, make a backup and then replace + # Future enhancement: implement proper merging for all database types + backup_file = f"{dest_file}.backup_{datetime.now().strftime('%Y%m%d%H%M%S')}" + shutil.copy2(dest_file, backup_file) + print(f"Created backup of {file_basename} as {os.path.basename(backup_file)}") + shutil.copy2(src_file, dest_file) + except Exception as e: + print(f"Error processing database {dest_file}: {e}") + continue - # # For non-database files - # if os.path.exists(dest_file): - # # If file exists, we want to keep the newer one - # # For media files, we always keep them (accumulate data) - # is_media_file = any(relativePath.startswith(prefix) for prefix in ['Media/', 'Message/', 'ProfilePictures/', 'Avatar/']) + # For non-database files + if os.path.exists(dest_file): + # If file exists, we want to keep the newer one + # For media files, we always keep them (accumulate data) + is_media_file = any(relativePath.startswith(prefix) for prefix in ['Media/', 'Message/', 'ProfilePictures/', 'Avatar/']) - # if is_media_file: - # # For media files, don't overwrite but create a version with timestamp if different - # if not files_are_identical(src_file, dest_file): - # filename, ext = os.path.splitext(dest_file) - # timestamp = datetime.now().strftime('%Y%m%d%H%M%S') - # new_dest_file = f"{filename}_{timestamp}{ext}" - # try: - # shutil.copy2(src_file, new_dest_file) - # print(f"Saved additional version of media file: {os.path.relpath(new_dest_file, output_dir)}") - # new_files += 1 - # except Exception as e: - # print(f"Error copying alternate version {src_file}: {e}") - # skipped_files += 1 - # else: - # skipped_files += 1 - # else: - # # For non-media files, we'll take the newer one - # try: - # shutil.copy2(src_file, dest_file) - # updated_files += 1 - # except Exception as e: - # print(f"Error updating {dest_file}: {e}") - # skipped_files += 1 - # else: - # # If file doesn't exist, copy it - # try: - # shutil.copy2(src_file, dest_file) - # new_files += 1 - # except Exception as e: - # print(f"Error copying {src_file} to {dest_file}: {e}") - # skipped_files += 1 + if is_media_file: + # For media files, don't overwrite but create a version with timestamp if different + if not files_are_identical(src_file, dest_file): + filename, ext = os.path.splitext(dest_file) + timestamp = datetime.now().strftime('%Y%m%d%H%M%S') + new_dest_file = f"{filename}_{timestamp}{ext}" + try: + shutil.copy2(src_file, new_dest_file) + print(f"Saved additional version of media file: {os.path.relpath(new_dest_file, output_dir)}") + new_files += 1 + except Exception as e: + print(f"Error copying alternate version {src_file}: {e}") + skipped_files += 1 + else: + skipped_files += 1 + else: + # For non-media files, we'll take the newer one + try: + shutil.copy2(src_file, dest_file) + updated_files += 1 + except Exception as e: + print(f"Error updating {dest_file}: {e}") + skipped_files += 1 + else: + # If file doesn't exist, copy it + try: + shutil.copy2(src_file, dest_file) + new_files += 1 + except Exception as e: + print(f"Error copying {src_file} to {dest_file}: {e}") + skipped_files += 1 print(f"\nBackup import summary:") print(f"- Added {new_files} new files") @@ -1180,15 +1181,15 @@ def merge_chat_database(src_file, dest_file): def main(): parser = argparse.ArgumentParser(description="WhatsApp Chat Exporter") - parser.add_argument("--output", default="_html_export", help="Directory to save the HTML files.") + parser.add_argument("--output-path", default="./", help="Directory to save the archive") parser.add_argument("--backup-path", default=None, help="Path to iPhone backup directory (for manifest.db processing)") args = parser.parse_args() if args.backup_path: - process_iphone_backup(args.backup_path, args.output) + process_iphone_backup(args.backup_path, args.output_path) # Use backup paths for archive creation - db_path = os.path.join(args.output, "ChatStorage.sqlite") - media_path = os.path.join(args.output, "Message/") + db_path = os.path.join(args.output_path, "ChatStorage.sqlite") + media_path = os.path.join(args.output_path, "Message/") else: parser.add_argument("db_path", help="Path to the ChatStorage.sqlite file.") parser.add_argument("media_path", help="Path to the root 'Media' directory.") @@ -1204,7 +1205,7 @@ def main(): print(f"Error: Media directory not found at '{media_path}'") return - os.makedirs(args.output, exist_ok=True) + os.makedirs(args.output_path, exist_ok=True) conn = sqlite3.connect(db_path) cursor = conn.cursor() @@ -1241,7 +1242,7 @@ def main(): print(f"Found {len(chats)} chats to export.") - index_path = os.path.join(args.output, "whatsapp-chats.html") + index_path = os.path.join(args.output_path, "whatsapp-chats.html") with open(index_path, 'w', encoding='utf-8') as index_f: index_f.write(f""" @@ -1371,12 +1372,12 @@ def main(): for chat_id, chat_name, contact_jid, message_count, first_message_date, last_message_date, avatar_path in chats: if not chat_name: chat_name = f"Unknown Chat ({contact_jid or chat_id})" - full_avatar_path = avatar_path if avatar_path and os.path.isabs(avatar_path) else os.path.join(args.output, avatar_path) if avatar_path else None + full_avatar_path = avatar_path if avatar_path and os.path.isabs(avatar_path) else os.path.join(args.output_path, avatar_path) if avatar_path else None - # Find all file paths in args.output that start with full_avatar_path + # Find all file paths in args.output_path that start with full_avatar_path matching_files = [] if full_avatar_path: - for root, dirs, files in os.walk(args.output): + for root, dirs, files in os.walk(args.output_path): for file in files: file_path = os.path.join(root, file) if file_path.startswith(full_avatar_path): @@ -1384,7 +1385,7 @@ def main(): # Use the first matching file if available if matching_files: - avatar_path = os.path.relpath(matching_files[0], args.output) + avatar_path = os.path.relpath(matching_files[0], args.output_path) full_avatar_path = matching_files[0] # A group chat JID typically ends with '@g.us' @@ -1415,10 +1416,10 @@ def main(): if message_count > 0: # Generate chat HTML only for chats with messages - generate_html_chat(db_path, media_path, args.output, chat_id, chat_name, is_group, contact_jid) + generate_html_chat(db_path, media_path, args.output_path, chat_id, chat_name, is_group, contact_jid) # Generate individual chat media gallery - generate_chat_media_gallery(db_path, args.output, chat_id, chat_name, contact_jid) + generate_chat_media_gallery(db_path, args.output_path, chat_id, chat_name, contact_jid) # Clickable entry with link index_f.write( @@ -1447,10 +1448,10 @@ def main(): index_f.write("") # Generate the all-media gallery - generate_all_media_gallery(db_path, args.output) + generate_all_media_gallery(db_path, args.output_path) # Create a simple redirect index.html - redirect_index = os.path.join(args.output, "index.html") + redirect_index = os.path.join(args.output_path, "index.html") with open(redirect_index, 'w', encoding='utf-8') as f: f.write(f""" @@ -1468,7 +1469,7 @@ def main(): print(f" • {os.path.abspath(index_path)}") print(f" • {os.path.abspath(redirect_index)}") print(f"\nAdditional features:") - print(f" • Media Gallery: {os.path.abspath(os.path.join(args.output, 'media-gallery', 'media-gallery.html'))}") + print(f" • Media Gallery: {os.path.abspath(os.path.join(args.output_path, 'media-gallery', 'media-gallery.html'))}") print(f" • Individual chat media galleries available in the media/ folder")