From 23820227d5dcf4f8791a31b21bea73a81b2fc725 Mon Sep 17 00:00:00 2001
From: Andreas Demmelbauer <git@notice.at>
Date: Tue, 9 Sep 2025 08:42:23 +0200
Subject: [PATCH] Update README and refactor output path handling in WhatsApp
 archiver

---
 README.md            |   8 ++-
 whatsapp_archiver.py | 159 ++++++++++++++++++++++---------------------
 2 files changed, 85 insertions(+), 82 deletions(-)

diff --git a/README.md b/README.md
index a9baf47..39e7700 100644
--- a/README.md
+++ b/README.md
@@ -33,13 +33,15 @@ The tool navigates this complex structure automatically to extract the WhatsApp
 Run the exporter with your iOS backup path:
 
 ```
-python3 whatsapp_exporter.py --backup-path=./iphone-backup/46de1f4ca4a30b155985910d009edaf586236798/ --output=./export/
+python3 whatsapp_exporter.py \
+  --backup-path="/Volume/BackupMedia/iphone-backup/46de1f4ca4a30b155985910d009edaf586236798/" \
+  --output-path="/Volume/BackupMedia/whatsapp-archive/data"
 ```
 
 ### Arguments
 
 - `--backup-path`: Path to the iOS backup directory (containing Manifest.db)
-- `--output`: Directory to save the HTML files (default: _html_export)
+- `--output-path`: Directory to save whatsapp archive (appends if exists)
 
 ## Archival Features
 
@@ -54,7 +56,7 @@ This tool acts as an archiver that:
 If you already have extracted WhatsApp database files, you can use them directly. However, it's not well tested:
 
 ```
-python3 whatsapp_exporter.py /path/to/ChatStorage.sqlite /path/to/Media/ --output=./export/
+python3 whatsapp_exporter.py /path/to/ChatStorage.sqlite /path/to/Media/ --output-path=./export/
 ```
 
 ## Limitations and Caveats
diff --git a/whatsapp_archiver.py b/whatsapp_archiver.py
index 8b8547d..15e9ae4 100644
--- a/whatsapp_archiver.py
+++ b/whatsapp_archiver.py
@@ -983,74 +983,75 @@ def process_iphone_backup(backup_path, output_dir):
         'Ranking.sqlite',
         'Sticker.sqlite'
     ]
-    
-    # # Prepare to recreate file structure
-    # for fileID, domain, relativePath in files:
-    #     src_file = os.path.join(backup_path, fileID[:2], fileID)
-    #     dest_file = os.path.join(output_dir, relativePath)
-    #     os.makedirs(os.path.dirname(dest_file), exist_ok=True)
+
+    print('Copying WhatsApp shared files to archive location...')
+    # Prepare to recreate file structure
+    for fileID, domain, relativePath in files:
+        src_file = os.path.join(backup_path, fileID[:2], fileID)
+        dest_file = os.path.join(output_dir, relativePath)
+        os.makedirs(os.path.dirname(dest_file), exist_ok=True)
         
-    #     if not os.path.exists(src_file):
-    #         # print(f"Source file missing: {src_file}")
-    #         skipped_files += 1
-    #         continue
+        if not os.path.exists(src_file):
+            # print(f"Source file missing: {src_file}")
+            skipped_files += 1
+            continue
         
-    #     # Handle SQLite database files specially - merge data instead of overwriting
-    #     file_basename = os.path.basename(dest_file)
-    #     if file_basename in db_files_to_merge and os.path.exists(dest_file):
-    #         special_db_files += 1
-    #         try:
-    #             # For SQLite databases, we need to merge the data
-    #             if file_basename == 'ChatStorage.sqlite':
-    #                 merge_chat_database(src_file, dest_file)
-    #             else:
-    #                 # For other SQLite databases, make a backup and then replace
-    #                 # Future enhancement: implement proper merging for all database types
-    #                 backup_file = f"{dest_file}.backup_{datetime.now().strftime('%Y%m%d%H%M%S')}"
-    #                 shutil.copy2(dest_file, backup_file)
-    #                 print(f"Created backup of {file_basename} as {os.path.basename(backup_file)}")
-    #                 shutil.copy2(src_file, dest_file)
-    #         except Exception as e:
-    #             print(f"Error processing database {dest_file}: {e}")
-    #         continue
+        # Handle SQLite database files specially - merge data instead of overwriting
+        file_basename = os.path.basename(dest_file)
+        if file_basename in db_files_to_merge and os.path.exists(dest_file):
+            special_db_files += 1
+            try:
+                # For SQLite databases, we need to merge the data
+                if file_basename == 'ChatStorage.sqlite':
+                    merge_chat_database(src_file, dest_file)
+                else:
+                    # For other SQLite databases, make a backup and then replace
+                    # Future enhancement: implement proper merging for all database types
+                    backup_file = f"{dest_file}.backup_{datetime.now().strftime('%Y%m%d%H%M%S')}"
+                    shutil.copy2(dest_file, backup_file)
+                    print(f"Created backup of {file_basename} as {os.path.basename(backup_file)}")
+                    shutil.copy2(src_file, dest_file)
+            except Exception as e:
+                print(f"Error processing database {dest_file}: {e}")
+            continue
             
-    #     # For non-database files
-    #     if os.path.exists(dest_file):
-    #         # If file exists, we want to keep the newer one
-    #         # For media files, we always keep them (accumulate data)
-    #         is_media_file = any(relativePath.startswith(prefix) for prefix in ['Media/', 'Message/', 'ProfilePictures/', 'Avatar/'])
+        # For non-database files
+        if os.path.exists(dest_file):
+            # If file exists, we want to keep the newer one
+            # For media files, we always keep them (accumulate data)
+            is_media_file = any(relativePath.startswith(prefix) for prefix in ['Media/', 'Message/', 'ProfilePictures/', 'Avatar/'])
             
-    #         if is_media_file:
-    #             # For media files, don't overwrite but create a version with timestamp if different
-    #             if not files_are_identical(src_file, dest_file):
-    #                 filename, ext = os.path.splitext(dest_file)
-    #                 timestamp = datetime.now().strftime('%Y%m%d%H%M%S')
-    #                 new_dest_file = f"{filename}_{timestamp}{ext}"
-    #                 try:
-    #                     shutil.copy2(src_file, new_dest_file)
-    #                     print(f"Saved additional version of media file: {os.path.relpath(new_dest_file, output_dir)}")
-    #                     new_files += 1
-    #                 except Exception as e:
-    #                     print(f"Error copying alternate version {src_file}: {e}")
-    #                     skipped_files += 1
-    #             else:
-    #                 skipped_files += 1
-    #         else:
-    #             # For non-media files, we'll take the newer one
-    #             try:
-    #                 shutil.copy2(src_file, dest_file)
-    #                 updated_files += 1
-    #             except Exception as e:
-    #                 print(f"Error updating {dest_file}: {e}")
-    #                 skipped_files += 1
-    #     else:
-    #         # If file doesn't exist, copy it
-    #         try:
-    #             shutil.copy2(src_file, dest_file)
-    #             new_files += 1
-    #         except Exception as e:
-    #             print(f"Error copying {src_file} to {dest_file}: {e}")
-    #             skipped_files += 1
+            if is_media_file:
+                # For media files, don't overwrite but create a version with timestamp if different
+                if not files_are_identical(src_file, dest_file):
+                    filename, ext = os.path.splitext(dest_file)
+                    timestamp = datetime.now().strftime('%Y%m%d%H%M%S')
+                    new_dest_file = f"{filename}_{timestamp}{ext}"
+                    try:
+                        shutil.copy2(src_file, new_dest_file)
+                        print(f"Saved additional version of media file: {os.path.relpath(new_dest_file, output_dir)}")
+                        new_files += 1
+                    except Exception as e:
+                        print(f"Error copying alternate version {src_file}: {e}")
+                        skipped_files += 1
+                else:
+                    skipped_files += 1
+            else:
+                # For non-media files, we'll take the newer one
+                try:
+                    shutil.copy2(src_file, dest_file)
+                    updated_files += 1
+                except Exception as e:
+                    print(f"Error updating {dest_file}: {e}")
+                    skipped_files += 1
+        else:
+            # If file doesn't exist, copy it
+            try:
+                shutil.copy2(src_file, dest_file)
+                new_files += 1
+            except Exception as e:
+                print(f"Error copying {src_file} to {dest_file}: {e}")
+                skipped_files += 1
     
     print(f"\nBackup import summary:")
     print(f"- Added {new_files} new files")
@@ -1180,15 +1181,15 @@ def merge_chat_database(src_file, dest_file):
 
 def main():
     parser = argparse.ArgumentParser(description="WhatsApp Chat Exporter")
-    parser.add_argument("--output", default="_html_export", help="Directory to save the HTML files.")
+    parser.add_argument("--output-path", default="./", help="Directory to save the archive")
     parser.add_argument("--backup-path", default=None, help="Path to iPhone backup directory (for manifest.db processing)")
     args = parser.parse_args()
 
     if args.backup_path:
-        process_iphone_backup(args.backup_path, args.output)
+        process_iphone_backup(args.backup_path, args.output_path)
         # Use backup paths for archive creation
-        db_path = os.path.join(args.output, "ChatStorage.sqlite")
-        media_path = os.path.join(args.output, "Message/")
+        db_path = os.path.join(args.output_path, "ChatStorage.sqlite")
+        media_path = os.path.join(args.output_path, "Message/")
     else:
         parser.add_argument("db_path", help="Path to the ChatStorage.sqlite file.")
         parser.add_argument("media_path", help="Path to the root 'Media' directory.")
@@ -1204,7 +1205,7 @@ def main():
         print(f"Error: Media directory not found at '{media_path}'")
         return
 
-    os.makedirs(args.output, exist_ok=True)
+    os.makedirs(args.output_path, exist_ok=True)
 
     conn = sqlite3.connect(db_path)
     cursor = conn.cursor()
@@ -1241,7 +1242,7 @@ def main():
 
     print(f"Found {len(chats)} chats to export.")
 
-    index_path = os.path.join(args.output, "whatsapp-chats.html")
+    index_path = os.path.join(args.output_path, "whatsapp-chats.html")
     with open(index_path, 'w', encoding='utf-8') as index_f:
         index_f.write(f"""
         <!DOCTYPE html>
@@ -1371,12 +1372,12 @@ def main():
         for chat_id, chat_name, contact_jid, message_count, first_message_date, last_message_date, avatar_path in chats:
             if not chat_name:
                 chat_name = f"Unknown Chat ({contact_jid or chat_id})"
-            full_avatar_path = avatar_path if avatar_path and os.path.isabs(avatar_path) else os.path.join(args.output, avatar_path) if avatar_path else None
+            full_avatar_path = avatar_path if avatar_path and os.path.isabs(avatar_path) else os.path.join(args.output_path, avatar_path) if avatar_path else None
 
-            # Find all file paths in args.output that start with full_avatar_path
+            # Find all file paths in args.output_path that start with full_avatar_path
             matching_files = []
             if full_avatar_path:
-                for root, dirs, files in os.walk(args.output):
+                for root, dirs, files in os.walk(args.output_path):
                     for file in files:
                         file_path = os.path.join(root, file)
                         if file_path.startswith(full_avatar_path):
@@ -1384,7 +1385,7 @@ def main():
 
             # Use the first matching file if available
             if matching_files:
-                avatar_path = os.path.relpath(matching_files[0], args.output)
+                avatar_path = os.path.relpath(matching_files[0], args.output_path)
                 full_avatar_path = matching_files[0]
             
             # A group chat JID typically ends with '@g.us'
@@ -1415,10 +1416,10 @@ def main():
             
             if message_count > 0:
                 # Generate chat HTML only for chats with messages
-                generate_html_chat(db_path, media_path, args.output, chat_id, chat_name, is_group, contact_jid)
+                generate_html_chat(db_path, media_path, args.output_path, chat_id, chat_name, is_group, contact_jid)
                 
                 # Generate individual chat media gallery
-                generate_chat_media_gallery(db_path, args.output, chat_id, chat_name, contact_jid)
+                generate_chat_media_gallery(db_path, args.output_path, chat_id, chat_name, contact_jid)
 
                 # Clickable entry with link
                 index_f.write(
@@ -1447,10 +1448,10 @@ def main():
         index_f.write("</ul></div></body></html>")
 
     # Generate the all-media gallery
-    generate_all_media_gallery(db_path, args.output)
+    generate_all_media_gallery(db_path, args.output_path)
 
     # Create a simple redirect index.html
-    redirect_index = os.path.join(args.output, "index.html")
+    redirect_index = os.path.join(args.output_path, "index.html")
     with open(redirect_index, 'w', encoding='utf-8') as f:
         f.write(f"""<!DOCTYPE html>
 <html>
@@ -1468,7 +1469,7 @@ def main():
     print(f"  • {os.path.abspath(index_path)}")
     print(f"  • {os.path.abspath(redirect_index)}")
     print(f"\nAdditional features:")
-    print(f"  • Media Gallery: {os.path.abspath(os.path.join(args.output, 'media-gallery', 'media-gallery.html'))}")
+    print(f"  • Media Gallery: {os.path.abspath(os.path.join(args.output_path, 'media-gallery', 'media-gallery.html'))}")
     print(f"  • Individual chat media galleries available in the media/ folder")