Selaa lähdekoodia

Update README and refactor output path handling in WhatsApp archiver

main
Andreas Demmelbauer 1 viikko sitten
vanhempi
commit
23820227d5
2 muutettua tiedostoa jossa 85 lisäystä ja 82 poistoa
  1. +5
    -3
      README.md
  2. +80
    -79
      whatsapp_archiver.py

+ 5
- 3
README.md Näytä tiedosto

@@ -33,13 +33,15 @@ The tool navigates this complex structure automatically to extract the WhatsApp
Run the exporter with your iOS backup path:

```
python3 whatsapp_exporter.py --backup-path=./iphone-backup/46de1f4ca4a30b155985910d009edaf586236798/ --output=./export/
python3 whatsapp_exporter.py \
--backup-path="/Volume/BackupMedia/iphone-backup/46de1f4ca4a30b155985910d009edaf586236798/" \
--output-path="/Volume/BackupMedia/whatsapp-archive/data"
```

### Arguments

- `--backup-path`: Path to the iOS backup directory (containing Manifest.db)
- `--output`: Directory to save the HTML files (default: _html_export)
- `--output-path`: Directory to save whatsapp archive (appends if exists)

## Archival Features

@@ -54,7 +56,7 @@ This tool acts as an archiver that:
If you already have extracted WhatsApp database files, you can use them directly. However, it's not well tested:

```
python3 whatsapp_exporter.py /path/to/ChatStorage.sqlite /path/to/Media/ --output=./export/
python3 whatsapp_exporter.py /path/to/ChatStorage.sqlite /path/to/Media/ --output-path=./export/
```

## Limitations and Caveats


+ 80
- 79
whatsapp_archiver.py Näytä tiedosto

@@ -983,74 +983,75 @@ def process_iphone_backup(backup_path, output_dir):
'Ranking.sqlite',
'Sticker.sqlite'
]
# # Prepare to recreate file structure
# for fileID, domain, relativePath in files:
# src_file = os.path.join(backup_path, fileID[:2], fileID)
# dest_file = os.path.join(output_dir, relativePath)
# os.makedirs(os.path.dirname(dest_file), exist_ok=True)

print('Copying WhatsApp shared files to archive location...')
# Prepare to recreate file structure
for fileID, domain, relativePath in files:
src_file = os.path.join(backup_path, fileID[:2], fileID)
dest_file = os.path.join(output_dir, relativePath)
os.makedirs(os.path.dirname(dest_file), exist_ok=True)
# if not os.path.exists(src_file):
# # print(f"Source file missing: {src_file}")
# skipped_files += 1
# continue
if not os.path.exists(src_file):
# print(f"Source file missing: {src_file}")
skipped_files += 1
continue
# # Handle SQLite database files specially - merge data instead of overwriting
# file_basename = os.path.basename(dest_file)
# if file_basename in db_files_to_merge and os.path.exists(dest_file):
# special_db_files += 1
# try:
# # For SQLite databases, we need to merge the data
# if file_basename == 'ChatStorage.sqlite':
# merge_chat_database(src_file, dest_file)
# else:
# # For other SQLite databases, make a backup and then replace
# # Future enhancement: implement proper merging for all database types
# backup_file = f"{dest_file}.backup_{datetime.now().strftime('%Y%m%d%H%M%S')}"
# shutil.copy2(dest_file, backup_file)
# print(f"Created backup of {file_basename} as {os.path.basename(backup_file)}")
# shutil.copy2(src_file, dest_file)
# except Exception as e:
# print(f"Error processing database {dest_file}: {e}")
# continue
# Handle SQLite database files specially - merge data instead of overwriting
file_basename = os.path.basename(dest_file)
if file_basename in db_files_to_merge and os.path.exists(dest_file):
special_db_files += 1
try:
# For SQLite databases, we need to merge the data
if file_basename == 'ChatStorage.sqlite':
merge_chat_database(src_file, dest_file)
else:
# For other SQLite databases, make a backup and then replace
# Future enhancement: implement proper merging for all database types
backup_file = f"{dest_file}.backup_{datetime.now().strftime('%Y%m%d%H%M%S')}"
shutil.copy2(dest_file, backup_file)
print(f"Created backup of {file_basename} as {os.path.basename(backup_file)}")
shutil.copy2(src_file, dest_file)
except Exception as e:
print(f"Error processing database {dest_file}: {e}")
continue
# # For non-database files
# if os.path.exists(dest_file):
# # If file exists, we want to keep the newer one
# # For media files, we always keep them (accumulate data)
# is_media_file = any(relativePath.startswith(prefix) for prefix in ['Media/', 'Message/', 'ProfilePictures/', 'Avatar/'])
# For non-database files
if os.path.exists(dest_file):
# If file exists, we want to keep the newer one
# For media files, we always keep them (accumulate data)
is_media_file = any(relativePath.startswith(prefix) for prefix in ['Media/', 'Message/', 'ProfilePictures/', 'Avatar/'])
# if is_media_file:
# # For media files, don't overwrite but create a version with timestamp if different
# if not files_are_identical(src_file, dest_file):
# filename, ext = os.path.splitext(dest_file)
# timestamp = datetime.now().strftime('%Y%m%d%H%M%S')
# new_dest_file = f"{filename}_{timestamp}{ext}"
# try:
# shutil.copy2(src_file, new_dest_file)
# print(f"Saved additional version of media file: {os.path.relpath(new_dest_file, output_dir)}")
# new_files += 1
# except Exception as e:
# print(f"Error copying alternate version {src_file}: {e}")
# skipped_files += 1
# else:
# skipped_files += 1
# else:
# # For non-media files, we'll take the newer one
# try:
# shutil.copy2(src_file, dest_file)
# updated_files += 1
# except Exception as e:
# print(f"Error updating {dest_file}: {e}")
# skipped_files += 1
# else:
# # If file doesn't exist, copy it
# try:
# shutil.copy2(src_file, dest_file)
# new_files += 1
# except Exception as e:
# print(f"Error copying {src_file} to {dest_file}: {e}")
# skipped_files += 1
if is_media_file:
# For media files, don't overwrite but create a version with timestamp if different
if not files_are_identical(src_file, dest_file):
filename, ext = os.path.splitext(dest_file)
timestamp = datetime.now().strftime('%Y%m%d%H%M%S')
new_dest_file = f"{filename}_{timestamp}{ext}"
try:
shutil.copy2(src_file, new_dest_file)
print(f"Saved additional version of media file: {os.path.relpath(new_dest_file, output_dir)}")
new_files += 1
except Exception as e:
print(f"Error copying alternate version {src_file}: {e}")
skipped_files += 1
else:
skipped_files += 1
else:
# For non-media files, we'll take the newer one
try:
shutil.copy2(src_file, dest_file)
updated_files += 1
except Exception as e:
print(f"Error updating {dest_file}: {e}")
skipped_files += 1
else:
# If file doesn't exist, copy it
try:
shutil.copy2(src_file, dest_file)
new_files += 1
except Exception as e:
print(f"Error copying {src_file} to {dest_file}: {e}")
skipped_files += 1
print(f"\nBackup import summary:")
print(f"- Added {new_files} new files")
@@ -1180,15 +1181,15 @@ def merge_chat_database(src_file, dest_file):

def main():
parser = argparse.ArgumentParser(description="WhatsApp Chat Exporter")
parser.add_argument("--output", default="_html_export", help="Directory to save the HTML files.")
parser.add_argument("--output-path", default="./", help="Directory to save the archive")
parser.add_argument("--backup-path", default=None, help="Path to iPhone backup directory (for manifest.db processing)")
args = parser.parse_args()

if args.backup_path:
process_iphone_backup(args.backup_path, args.output)
process_iphone_backup(args.backup_path, args.output_path)
# Use backup paths for archive creation
db_path = os.path.join(args.output, "ChatStorage.sqlite")
media_path = os.path.join(args.output, "Message/")
db_path = os.path.join(args.output_path, "ChatStorage.sqlite")
media_path = os.path.join(args.output_path, "Message/")
else:
parser.add_argument("db_path", help="Path to the ChatStorage.sqlite file.")
parser.add_argument("media_path", help="Path to the root 'Media' directory.")
@@ -1204,7 +1205,7 @@ def main():
print(f"Error: Media directory not found at '{media_path}'")
return

os.makedirs(args.output, exist_ok=True)
os.makedirs(args.output_path, exist_ok=True)

conn = sqlite3.connect(db_path)
cursor = conn.cursor()
@@ -1241,7 +1242,7 @@ def main():

print(f"Found {len(chats)} chats to export.")

index_path = os.path.join(args.output, "whatsapp-chats.html")
index_path = os.path.join(args.output_path, "whatsapp-chats.html")
with open(index_path, 'w', encoding='utf-8') as index_f:
index_f.write(f"""
<!DOCTYPE html>
@@ -1371,12 +1372,12 @@ def main():
for chat_id, chat_name, contact_jid, message_count, first_message_date, last_message_date, avatar_path in chats:
if not chat_name:
chat_name = f"Unknown Chat ({contact_jid or chat_id})"
full_avatar_path = avatar_path if avatar_path and os.path.isabs(avatar_path) else os.path.join(args.output, avatar_path) if avatar_path else None
full_avatar_path = avatar_path if avatar_path and os.path.isabs(avatar_path) else os.path.join(args.output_path, avatar_path) if avatar_path else None

# Find all file paths in args.output that start with full_avatar_path
# Find all file paths in args.output_path that start with full_avatar_path
matching_files = []
if full_avatar_path:
for root, dirs, files in os.walk(args.output):
for root, dirs, files in os.walk(args.output_path):
for file in files:
file_path = os.path.join(root, file)
if file_path.startswith(full_avatar_path):
@@ -1384,7 +1385,7 @@ def main():

# Use the first matching file if available
if matching_files:
avatar_path = os.path.relpath(matching_files[0], args.output)
avatar_path = os.path.relpath(matching_files[0], args.output_path)
full_avatar_path = matching_files[0]
# A group chat JID typically ends with '@g.us'
@@ -1415,10 +1416,10 @@ def main():
if message_count > 0:
# Generate chat HTML only for chats with messages
generate_html_chat(db_path, media_path, args.output, chat_id, chat_name, is_group, contact_jid)
generate_html_chat(db_path, media_path, args.output_path, chat_id, chat_name, is_group, contact_jid)
# Generate individual chat media gallery
generate_chat_media_gallery(db_path, args.output, chat_id, chat_name, contact_jid)
generate_chat_media_gallery(db_path, args.output_path, chat_id, chat_name, contact_jid)

# Clickable entry with link
index_f.write(
@@ -1447,10 +1448,10 @@ def main():
index_f.write("</ul></div></body></html>")

# Generate the all-media gallery
generate_all_media_gallery(db_path, args.output)
generate_all_media_gallery(db_path, args.output_path)

# Create a simple redirect index.html
redirect_index = os.path.join(args.output, "index.html")
redirect_index = os.path.join(args.output_path, "index.html")
with open(redirect_index, 'w', encoding='utf-8') as f:
f.write(f"""<!DOCTYPE html>
<html>
@@ -1468,7 +1469,7 @@ def main():
print(f" • {os.path.abspath(index_path)}")
print(f" • {os.path.abspath(redirect_index)}")
print(f"\nAdditional features:")
print(f" • Media Gallery: {os.path.abspath(os.path.join(args.output, 'media-gallery', 'media-gallery.html'))}")
print(f" • Media Gallery: {os.path.abspath(os.path.join(args.output_path, 'media-gallery', 'media-gallery.html'))}")
print(f" • Individual chat media galleries available in the media/ folder")




Ladataan…
Peruuta
Tallenna