Update README and refactor output path handling in WhatsApp archiver

2 kuukautta sitten · 23820227d5
--- a/README.md
+++ b/README.md
@@ -33,13 +33,15 @@ The tool navigates this complex structure automatically to extract the WhatsApp
 Run the exporter with your iOS backup path:

 ```
 python3 whatsapp_exporter.py --backup-path=./iphone-backup/46de1f4ca4a30b155985910d009edaf586236798/ --output=./export/
 python3 whatsapp_exporter.py \
  --backup-path="/Volume/BackupMedia/iphone-backup/46de1f4ca4a30b155985910d009edaf586236798/" \
  --output-path="/Volume/BackupMedia/whatsapp-archive/data"
 ```

 ### Arguments

 - `--backup-path`: Path to the iOS backup directory (containing Manifest.db)
 - `--output`: Directory to save the HTML files (default: _html_export)
 - `--output-path`: Directory to save whatsapp archive (appends if exists)

 ## Archival Features

@@ -54,7 +56,7 @@ This tool acts as an archiver that:
 If you already have extracted WhatsApp database files, you can use them directly. However, it's not well tested:

 ```
 python3 whatsapp_exporter.py /path/to/ChatStorage.sqlite /path/to/Media/ --output=./export/
 python3 whatsapp_exporter.py /path/to/ChatStorage.sqlite /path/to/Media/ --output-path=./export/
 ```

 ## Limitations and Caveats
--- a/whatsapp_archiver.py
+++ b/whatsapp_archiver.py
@@ -983,74 +983,75 @@ def process_iphone_backup(backup_path, output_dir):
        'Ranking.sqlite',
        'Sticker.sqlite'
    ]
    
    # # Prepare to recreate file structure
    # for fileID, domain, relativePath in files:
    #     src_file = os.path.join(backup_path, fileID[:2], fileID)
    #     dest_file = os.path.join(output_dir, relativePath)
    #     os.makedirs(os.path.dirname(dest_file), exist_ok=True)

    print('Copying WhatsApp shared files to archive location...')
    # Prepare to recreate file structure
    for fileID, domain, relativePath in files:
        src_file = os.path.join(backup_path, fileID[:2], fileID)
        dest_file = os.path.join(output_dir, relativePath)
        os.makedirs(os.path.dirname(dest_file), exist_ok=True)
        
    #     if not os.path.exists(src_file):
    #         # print(f"Source file missing: {src_file}")
    #         skipped_files += 1
    #         continue
        if not os.path.exists(src_file):
            # print(f"Source file missing: {src_file}")
            skipped_files += 1
            continue
        
    #     # Handle SQLite database files specially - merge data instead of overwriting
    #     file_basename = os.path.basename(dest_file)
    #     if file_basename in db_files_to_merge and os.path.exists(dest_file):
    #         special_db_files += 1
    #         try:
    #             # For SQLite databases, we need to merge the data
    #             if file_basename == 'ChatStorage.sqlite':
    #                 merge_chat_database(src_file, dest_file)
    #             else:
    #                 # For other SQLite databases, make a backup and then replace
    #                 # Future enhancement: implement proper merging for all database types
    #                 backup_file = f"{dest_file}.backup_{datetime.now().strftime('%Y%m%d%H%M%S')}"
    #                 shutil.copy2(dest_file, backup_file)
    #                 print(f"Created backup of {file_basename} as {os.path.basename(backup_file)}")
    #                 shutil.copy2(src_file, dest_file)
    #         except Exception as e:
    #             print(f"Error processing database {dest_file}: {e}")
    #         continue
        # Handle SQLite database files specially - merge data instead of overwriting
        file_basename = os.path.basename(dest_file)
        if file_basename in db_files_to_merge and os.path.exists(dest_file):
            special_db_files += 1
            try:
                # For SQLite databases, we need to merge the data
                if file_basename == 'ChatStorage.sqlite':
                    merge_chat_database(src_file, dest_file)
                else:
                    # For other SQLite databases, make a backup and then replace
                    # Future enhancement: implement proper merging for all database types
                    backup_file = f"{dest_file}.backup_{datetime.now().strftime('%Y%m%d%H%M%S')}"
                    shutil.copy2(dest_file, backup_file)
                    print(f"Created backup of {file_basename} as {os.path.basename(backup_file)}")
                    shutil.copy2(src_file, dest_file)
            except Exception as e:
                print(f"Error processing database {dest_file}: {e}")
            continue
            
    #     # For non-database files
    #     if os.path.exists(dest_file):
    #         # If file exists, we want to keep the newer one
    #         # For media files, we always keep them (accumulate data)
    #         is_media_file = any(relativePath.startswith(prefix) for prefix in ['Media/', 'Message/', 'ProfilePictures/', 'Avatar/'])
        # For non-database files
        if os.path.exists(dest_file):
            # If file exists, we want to keep the newer one
            # For media files, we always keep them (accumulate data)
            is_media_file = any(relativePath.startswith(prefix) for prefix in ['Media/', 'Message/', 'ProfilePictures/', 'Avatar/'])
            
    #         if is_media_file:
    #             # For media files, don't overwrite but create a version with timestamp if different
    #             if not files_are_identical(src_file, dest_file):
    #                 filename, ext = os.path.splitext(dest_file)
    #                 timestamp = datetime.now().strftime('%Y%m%d%H%M%S')
    #                 new_dest_file = f"{filename}_{timestamp}{ext}"
    #                 try:
    #                     shutil.copy2(src_file, new_dest_file)
    #                     print(f"Saved additional version of media file: {os.path.relpath(new_dest_file, output_dir)}")
    #                     new_files += 1
    #                 except Exception as e:
    #                     print(f"Error copying alternate version {src_file}: {e}")
    #                     skipped_files += 1
    #             else:
    #                 skipped_files += 1
    #         else:
    #             # For non-media files, we'll take the newer one
    #             try:
    #                 shutil.copy2(src_file, dest_file)
    #                 updated_files += 1
    #             except Exception as e:
    #                 print(f"Error updating {dest_file}: {e}")
    #                 skipped_files += 1
    #     else:
    #         # If file doesn't exist, copy it
    #         try:
    #             shutil.copy2(src_file, dest_file)
    #             new_files += 1
    #         except Exception as e:
    #             print(f"Error copying {src_file} to {dest_file}: {e}")
    #             skipped_files += 1
            if is_media_file:
                # For media files, don't overwrite but create a version with timestamp if different
                if not files_are_identical(src_file, dest_file):
                    filename, ext = os.path.splitext(dest_file)
                    timestamp = datetime.now().strftime('%Y%m%d%H%M%S')
                    new_dest_file = f"{filename}_{timestamp}{ext}"
                    try:
                        shutil.copy2(src_file, new_dest_file)
                        print(f"Saved additional version of media file: {os.path.relpath(new_dest_file, output_dir)}")
                        new_files += 1
                    except Exception as e:
                        print(f"Error copying alternate version {src_file}: {e}")
                        skipped_files += 1
                else:
                    skipped_files += 1
            else:
                # For non-media files, we'll take the newer one
                try:
                    shutil.copy2(src_file, dest_file)
                    updated_files += 1
                except Exception as e:
                    print(f"Error updating {dest_file}: {e}")
                    skipped_files += 1
        else:
            # If file doesn't exist, copy it
            try:
                shutil.copy2(src_file, dest_file)
                new_files += 1
            except Exception as e:
                print(f"Error copying {src_file} to {dest_file}: {e}")
                skipped_files += 1
    
    print(f"\nBackup import summary:")
    print(f"- Added {new_files} new files")
@@ -1180,15 +1181,15 @@ def merge_chat_database(src_file, dest_file):

 def main():
    parser = argparse.ArgumentParser(description="WhatsApp Chat Exporter")
    parser.add_argument("--output", default="_html_export", help="Directory to save the HTML files.")
    parser.add_argument("--output-path", default="./", help="Directory to save the archive")
    parser.add_argument("--backup-path", default=None, help="Path to iPhone backup directory (for manifest.db processing)")
    args = parser.parse_args()

    if args.backup_path:
        process_iphone_backup(args.backup_path, args.output)
        process_iphone_backup(args.backup_path, args.output_path)
        # Use backup paths for archive creation
        db_path = os.path.join(args.output, "ChatStorage.sqlite")
        media_path = os.path.join(args.output, "Message/")
        db_path = os.path.join(args.output_path, "ChatStorage.sqlite")
        media_path = os.path.join(args.output_path, "Message/")
    else:
        parser.add_argument("db_path", help="Path to the ChatStorage.sqlite file.")
        parser.add_argument("media_path", help="Path to the root 'Media' directory.")
@@ -1204,7 +1205,7 @@ def main():
        print(f"Error: Media directory not found at '{media_path}'")
        return

    os.makedirs(args.output, exist_ok=True)
    os.makedirs(args.output_path, exist_ok=True)

    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()
@@ -1241,7 +1242,7 @@ def main():

    print(f"Found {len(chats)} chats to export.")

    index_path = os.path.join(args.output, "whatsapp-chats.html")
    index_path = os.path.join(args.output_path, "whatsapp-chats.html")
    with open(index_path, 'w', encoding='utf-8') as index_f:
        index_f.write(f"""
        <!DOCTYPE html>
@@ -1371,12 +1372,12 @@ def main():
        for chat_id, chat_name, contact_jid, message_count, first_message_date, last_message_date, avatar_path in chats:
            if not chat_name:
                chat_name = f"Unknown Chat ({contact_jid or chat_id})"
            full_avatar_path = avatar_path if avatar_path and os.path.isabs(avatar_path) else os.path.join(args.output, avatar_path) if avatar_path else None
            full_avatar_path = avatar_path if avatar_path and os.path.isabs(avatar_path) else os.path.join(args.output_path, avatar_path) if avatar_path else None

            # Find all file paths in args.output that start with full_avatar_path
            # Find all file paths in args.output_path that start with full_avatar_path
            matching_files = []
            if full_avatar_path:
                for root, dirs, files in os.walk(args.output):
                for root, dirs, files in os.walk(args.output_path):
                    for file in files:
                        file_path = os.path.join(root, file)
                        if file_path.startswith(full_avatar_path):
@@ -1384,7 +1385,7 @@ def main():

            # Use the first matching file if available
            if matching_files:
                avatar_path = os.path.relpath(matching_files[0], args.output)
                avatar_path = os.path.relpath(matching_files[0], args.output_path)
                full_avatar_path = matching_files[0]
            
            # A group chat JID typically ends with '@g.us'
@@ -1415,10 +1416,10 @@ def main():
            
            if message_count > 0:
                # Generate chat HTML only for chats with messages
                generate_html_chat(db_path, media_path, args.output, chat_id, chat_name, is_group, contact_jid)
                generate_html_chat(db_path, media_path, args.output_path, chat_id, chat_name, is_group, contact_jid)
                
                # Generate individual chat media gallery
                generate_chat_media_gallery(db_path, args.output, chat_id, chat_name, contact_jid)
                generate_chat_media_gallery(db_path, args.output_path, chat_id, chat_name, contact_jid)

                # Clickable entry with link
                index_f.write(
@@ -1447,10 +1448,10 @@ def main():
        index_f.write("</ul></div></body></html>")

    # Generate the all-media gallery
    generate_all_media_gallery(db_path, args.output)
    generate_all_media_gallery(db_path, args.output_path)

    # Create a simple redirect index.html
    redirect_index = os.path.join(args.output, "index.html")
    redirect_index = os.path.join(args.output_path, "index.html")
    with open(redirect_index, 'w', encoding='utf-8') as f:
        f.write(f"""<!DOCTYPE html>
 <html>
@@ -1468,7 +1469,7 @@ def main():
    print(f"  • {os.path.abspath(index_path)}")
    print(f"  • {os.path.abspath(redirect_index)}")
    print(f"\nAdditional features:")
    print(f"  • Media Gallery: {os.path.abspath(os.path.join(args.output, 'media-gallery', 'media-gallery.html'))}")
    print(f"  • Media Gallery: {os.path.abspath(os.path.join(args.output_path, 'media-gallery', 'media-gallery.html'))}")
    print(f"  • Individual chat media galleries available in the media/ folder")