#!/usr/bin/env python3 # Archives iPhone photos from a local unencrypted backup and generates an HTML gallery. import argparse import os import shutil import sqlite3 from pathlib import Path from datetime import datetime from collections import defaultdict import struct def read_exif_date(file_path): """Simple EXIF reader to extract date information from image files.""" try: with open(file_path, 'rb') as f: # Read file header to determine format header = f.read(12) f.seek(0) if header.startswith(b'\xff\xe1') and b'Exif' in header: # JPEG with EXIF return _read_jpeg_exif_date(f) elif header.startswith(b'\xff\xd8'): # JPEG - scan for EXIF segment return _scan_jpeg_exif_date(f) elif header[4:8] == b'ftyp': # HEIC format - basic attempt return _read_heic_exif_date(f) elif header.startswith(b'\x89PNG'): # PNG format return _read_png_exif_date(f) except Exception: pass return None def _read_jpeg_exif_date(f): """Read EXIF date from JPEG file.""" f.seek(0) # Find EXIF segment while True: marker = f.read(2) if not marker or marker[0] != 0xff: break if marker == b'\xff\xe1': # APP1 segment (EXIF) length = struct.unpack('>H', f.read(2))[0] exif_data = f.read(length - 2) if exif_data.startswith(b'Exif\x00\x00'): return _parse_exif_data(exif_data[6:]) else: # Skip other segments if marker[1] in [0xd8, 0xd9]: # SOI, EOI continue try: length = struct.unpack('>H', f.read(2))[0] f.seek(length - 2, 1) except: break return None def _scan_jpeg_exif_date(f): """Scan JPEG file for EXIF segment.""" f.seek(0) data = f.read(65536) # Read first 64KB # Look for EXIF marker exif_pos = data.find(b'Exif\x00\x00') if exif_pos > 0: return _parse_exif_data(data[exif_pos + 6:]) return None def _read_heic_exif_date(f): """Basic HEIC EXIF reading - simplified approach.""" f.seek(0) # Read a larger chunk to find EXIF data data = f.read(2 * 1024 * 1024) # 2MB should be enough for metadata # Look for EXIF marker in HEIC - try multiple patterns patterns = [b'Exif\x00\x00', b'Exif\x00\x01', b'EXIF\x00\x00'] for pattern in patterns: exif_pos = data.find(pattern) if exif_pos >= 0: # Try to parse EXIF data starting after the marker try: result = _parse_exif_data(data[exif_pos + len(pattern):]) if result: return result except: continue # Alternative: look for datetime strings directly in the file return _scan_for_datetime_strings(data) def _scan_for_datetime_strings(data): """Scan binary data for datetime strings.""" import re try: # Convert to string for regex search, ignoring decode errors text = data.decode('ascii', errors='ignore') # Look for datetime patterns like "2024:08:15 14:30:45" datetime_pattern = r'20\d{2}:\d{2}:\d{2}\s+\d{2}:\d{2}:\d{2}' matches = re.findall(datetime_pattern, text) if matches: # Return the first valid datetime found return matches[0] except: pass return None def _read_png_exif_date(f): """Read EXIF date from PNG file.""" f.seek(8) # Skip PNG signature while True: try: # Read chunk length and type length_data = f.read(4) if len(length_data) != 4: break length = struct.unpack('>I', length_data)[0] chunk_type = f.read(4) if len(chunk_type) != 4: break if chunk_type == b'eXIf': # PNG EXIF chunk - contains standard EXIF data exif_data = f.read(length) return _parse_exif_data(exif_data) elif chunk_type == b'iTXt': # International text chunk - might contain date chunk_data = f.read(length) try: # iTXt format: keyword\0compression\0language\0translated_keyword\0text parts = chunk_data.split(b'\0', 4) if len(parts) >= 5: keyword = parts[0].decode('latin-1', errors='ignore') text = parts[4].decode('utf-8', errors='ignore') # Look for date-related keywords if keyword.lower() in ['date', 'datetime', 'creation time', 'date:create', 'exif:datetime']: # Try to parse as datetime import re datetime_match = re.search(r'20\d{2}[:-]\d{2}[:-]\d{2}[\sT]\d{2}:\d{2}:\d{2}', text) if datetime_match: date_str = datetime_match.group() # Convert to EXIF format date_str = date_str.replace('-', ':').replace('T', ' ') return date_str except: pass elif chunk_type == b'tEXt': # Text chunk - might contain date chunk_data = f.read(length) try: # tEXt format: keyword\0text null_pos = chunk_data.find(b'\0') if null_pos > 0: keyword = chunk_data[:null_pos].decode('latin-1', errors='ignore') text = chunk_data[null_pos+1:].decode('latin-1', errors='ignore') if keyword.lower() in ['date', 'creation time', 'timestamp']: import re datetime_match = re.search(r'20\d{2}[:-]\d{2}[:-]\d{2}[\sT]\d{2}:\d{2}:\d{2}', text) if datetime_match: date_str = datetime_match.group() date_str = date_str.replace('-', ':').replace('T', ' ') return date_str except: pass else: # Skip other chunk types f.seek(length, 1) # Skip CRC f.seek(4, 1) except (struct.error, OSError): break return None def _parse_exif_data(exif_data): """Parse EXIF data to extract date tags.""" if len(exif_data) < 8: return None try: # Check byte order if exif_data[:2] == b'II': endian = '<' # Little endian elif exif_data[:2] == b'MM': endian = '>' # Big endian else: return None # Get IFD offset ifd_offset = struct.unpack(endian + 'I', exif_data[4:8])[0] if ifd_offset >= len(exif_data): return None # Read IFD entries date_tags = { 0x9003: 'DateTimeOriginal', # EXIF DateTimeOriginal 0x0132: 'DateTime', # Image DateTime 0x9004: 'DateTimeDigitized', # EXIF DateTimeDigitized 0x0306: 'DateTime', # Additional DateTime tag } # Try to find date in IFD0 date_value = _read_ifd_dates(exif_data, ifd_offset, endian, date_tags) if date_value: return date_value # Try EXIF sub-IFD if available exif_ifd_offset = _find_exif_ifd(exif_data, ifd_offset, endian) if exif_ifd_offset and exif_ifd_offset < len(exif_data): date_value = _read_ifd_dates(exif_data, exif_ifd_offset, endian, date_tags) if date_value: return date_value # Try IFD1 (thumbnail) if available ifd1_offset = _get_next_ifd(exif_data, ifd_offset, endian) if ifd1_offset and ifd1_offset < len(exif_data): date_value = _read_ifd_dates(exif_data, ifd1_offset, endian, date_tags) if date_value: return date_value except Exception: pass return None def _read_ifd_dates(exif_data, ifd_offset, endian, date_tags): """Read date tags from IFD.""" try: if ifd_offset + 2 >= len(exif_data): return None entry_count = struct.unpack(endian + 'H', exif_data[ifd_offset:ifd_offset + 2])[0] for i in range(entry_count): entry_offset = ifd_offset + 2 + (i * 12) if entry_offset + 12 > len(exif_data): break tag, tag_type, count, value_offset = struct.unpack( endian + 'HHII', exif_data[entry_offset:entry_offset + 12] ) if tag in date_tags: # Handle ASCII string (type 2) if tag_type == 2: if count <= 4: # Value stored in value_offset field value_data = struct.pack(endian + 'I', value_offset)[:count-1] else: # Value stored at offset if value_offset + count <= len(exif_data): value_data = exif_data[value_offset:value_offset + count - 1] else: continue try: date_str = value_data.decode('ascii') if len(date_str) >= 19 and ':' in date_str: # "YYYY:MM:DD HH:MM:SS" return date_str except: continue # Handle other types that might contain date strings elif tag_type in [1, 3, 4, 5]: # BYTE, SHORT, LONG, RATIONAL try: if count <= 4: # Data stored inline raw_data = struct.pack(endian + 'I', value_offset) else: # Data stored at offset if value_offset + count * 4 <= len(exif_data): raw_data = exif_data[value_offset:value_offset + min(count * 4, 20)] else: continue # Try to decode as ASCII try: potential_date = raw_data.decode('ascii', errors='ignore').rstrip('\x00') if len(potential_date) >= 19 and ':' in potential_date: return potential_date except: pass except: continue except Exception: pass return None def _get_next_ifd(exif_data, ifd_offset, endian): """Get the offset of the next IFD.""" try: if ifd_offset + 2 >= len(exif_data): return None entry_count = struct.unpack(endian + 'H', exif_data[ifd_offset:ifd_offset + 2])[0] next_ifd_offset_pos = ifd_offset + 2 + (entry_count * 12) if next_ifd_offset_pos + 4 <= len(exif_data): next_ifd_offset = struct.unpack(endian + 'I', exif_data[next_ifd_offset_pos:next_ifd_offset_pos + 4])[0] return next_ifd_offset if next_ifd_offset > 0 else None except Exception: pass return None def _find_exif_ifd(exif_data, ifd_offset, endian): """Find EXIF sub-IFD offset.""" try: if ifd_offset + 2 >= len(exif_data): return None entry_count = struct.unpack(endian + 'H', exif_data[ifd_offset:ifd_offset + 2])[0] for i in range(entry_count): entry_offset = ifd_offset + 2 + (i * 12) if entry_offset + 12 > len(exif_data): break tag, tag_type, count, value_offset = struct.unpack( endian + 'HHII', exif_data[entry_offset:entry_offset + 12] ) if tag == 0x8769: # EXIF IFD tag return value_offset except Exception: pass return None def copy_camera_roll(backup_path: Path, output_path: Path): manifest_db = backup_path / "Manifest.db" if not manifest_db.exists(): raise FileNotFoundError(f"Manifest.db not found in {backup_path}") conn = sqlite3.connect(manifest_db) cursor = conn.cursor() # Query all files from CameraRollDomain cursor.execute(""" SELECT fileID, relativePath FROM Files WHERE domain = 'CameraRollDomain' """) rows = cursor.fetchall() print(f"Found {len(rows)} CameraRollDomain files") for file_id, relative_path in rows: # FileID is stored as 40-char hex. Backup stores it as / src = backup_path / file_id[:2] / file_id if not src.exists(): print(f"āš ļø Missing file: {src}") continue dest = output_path / relative_path dest.parent.mkdir(parents=True, exist_ok=True) if not dest.exists(): shutil.copy2(src, dest) print(f"āœ… Copied {relative_path}") else: print(f"ā© Skipped (already exists): {relative_path}") conn.close() print("šŸŽ‰ Backup extraction completed.") def find_display_file(original_file, metadata_dcim, thumbnails_dcim): """Find the best display file (metadata JPG or thumbnail) for an original file.""" base_name = original_file.stem # e.g., "IMG_1105" # First try to find in metadata if metadata_dcim.exists(): for folder in metadata_dcim.iterdir(): if folder.is_dir(): metadata_jpg = folder / f"{base_name}.JPG" if metadata_jpg.exists(): return metadata_jpg, "metadata" # Fallback to thumbnails - each image has its own directory named with full filename if thumbnails_dcim.exists(): for dcim_folder in thumbnails_dcim.iterdir(): if dcim_folder.is_dir(): # Look for a directory named after the full original filename image_dir = dcim_folder / original_file.name if image_dir.exists() and image_dir.is_dir(): # Find the JPG file inside this directory (usually numbered like 5003.JPG) for jpg_file in image_dir.glob("*.JPG"): return jpg_file, "thumbnail" # If no display file found, use original return original_file, "original" def get_all_original_files(original_dcim): """Get all original image/video files from DCIM folders.""" original_files = [] for folder in original_dcim.iterdir(): if not folder.is_dir(): continue for ext in ['.HEIC', '.JPG', '.PNG', '.MOV', '.MP4', '.JPEG']: for file_path in folder.glob(f"*{ext}"): original_files.append(file_path) return original_files def get_file_info(file_path): """Get file information including size, modification time, and date taken from EXIF.""" stat = file_path.stat() # Try to get date taken from EXIF data using our custom reader date_taken = None date_taken_obj = None if file_path.suffix.lower() in ['.jpg', '.jpeg', '.heic', '.png']: try: exif_date = read_exif_date(file_path) if exif_date: try: date_taken_obj = datetime.strptime(exif_date, '%Y:%m:%d %H:%M:%S') date_taken = date_taken_obj.strftime('%Y-%m-%d %H:%M:%S') except ValueError: pass except Exception: pass # Ignore errors reading EXIF data # Fallback to file modification time if no EXIF date if not date_taken_obj: date_taken_obj = datetime.fromtimestamp(stat.st_mtime) date_taken = date_taken_obj.strftime('%Y-%m-%d %H:%M:%S') return { 'size': stat.st_size, 'date_taken': date_taken, 'date_taken_obj': date_taken_obj, 'size_mb': round(stat.st_size / (1024 * 1024), 2) } def generate_gallery(photos_root: Path): """Generate the HTML image gallery.""" html_view = photos_root / "html_view" # Paths for different file types metadata_dcim = photos_root / "Media" / "PhotoData" / "Metadata" / "DCIM" thumbnails_dcim = photos_root / "Media" / "PhotoData" / "Thumbnails" / "V2" / "DCIM" original_dcim = photos_root / "Media" / "DCIM" if not original_dcim.exists(): print(f"āŒ Original DCIM folder not found: {original_dcim}") return print(f"šŸ“ Looking for display files in:") print(f" Metadata: {metadata_dcim.exists() and 'Found' or 'Not found'}") print(f" Thumbnails: {thumbnails_dcim.exists() and 'Found' or 'Not found'}") # Get all original files original_files = get_all_original_files(original_dcim) print(f"Found {len(original_files)} original files") if not original_files: print("āŒ No images found to generate gallery") return # Collect all images images = [] metadata_count = 0 thumbnail_count = 0 original_only_count = 0 for original_file in original_files: # Find the best display file display_file, display_type = find_display_file(original_file, metadata_dcim, thumbnails_dcim) # Count display types if display_type == "metadata": metadata_count += 1 elif display_type == "thumbnail": thumbnail_count += 1 else: original_only_count += 1 # Get file info original_info = get_file_info(original_file) display_info = get_file_info(display_file) if display_file != original_file else original_info # Get folder name from original file path folder_name = original_file.parent.name images.append({ 'name': original_file.stem, 'display_path': str(display_file.relative_to(photos_root)), 'original_path': str(original_file.relative_to(photos_root)), 'folder': folder_name, 'display_info': display_info, 'original_info': original_info, 'original_ext': original_file.suffix.upper(), 'display_type': display_type, 'display_ext': display_file.suffix.upper() }) print(f"šŸ“Š Total original files: {len(original_files)}") print(f"šŸ“Š Using metadata for display: {metadata_count}") print(f"šŸ“Š Using thumbnails for display: {thumbnail_count}") print(f"šŸ“Š Using original for display: {original_only_count}") print(f"šŸ“Š Images to display: {len(images)}") # Sort images by date taken (newest first), then by name images.sort(key=lambda x: (x['original_info']['date_taken_obj'], x['name']), reverse=True) # Group images by date grouped_images = defaultdict(list) for img in images: date_key = img['original_info']['date_taken_obj'].strftime('%Y-%m-%d') grouped_images[date_key].append(img) # Generate HTML content html_content = generate_html_content(images, grouped_images) # Write the HTML file html_view.mkdir(exist_ok=True) output_file = html_view / "index.html" with open(output_file, 'w', encoding='utf-8') as f: f.write(html_content) print(f"āœ… Gallery generated: {output_file}") print(f"šŸ“Š {len(images)} images included") print(f"🌐 Open {output_file} in your browser to view the gallery") def generate_html_content(images, grouped_images): """Generate the HTML content for the gallery.""" return f""" iPhone Gallery Archive

šŸ“ø iPhone Camera Roll Gallery

Extracted from iPhone backup

{len(images)} photos • {len(set(img['folder'] for img in images))} folders
""" def generate_gallery_sections(grouped_images): """Generate HTML for gallery sections grouped by date.""" sections_html = "" for date_key in sorted(grouped_images.keys(), reverse=True): date_obj = datetime.strptime(date_key, '%Y-%m-%d') date_display = date_obj.strftime('%d.%m.%Y') image_count = len(grouped_images[date_key]) sections_html += f"""
{date_display}
{image_count} {'photo' if image_count == 1 else 'photos'}
""" return sections_html def main(): parser = argparse.ArgumentParser(description="Extract Camera Roll from iPhone backup and optionally generate HTML gallery") parser.add_argument("--backup-path", required=True, type=Path, help="Path to iPhone backup folder (with Manifest.db)") parser.add_argument("--output-path", required=True, type=Path, help="Path where Camera Roll should be restored") parser.add_argument("--generate-gallery", action="store_true", help="Generate HTML gallery after extraction") args = parser.parse_args() # Extract camera roll copy_camera_roll(args.backup_path, args.output_path) # Generate gallery if requested if args.generate_gallery: print("\nšŸ–¼ļø Generating HTML gallery...") generate_gallery(args.output_path) if __name__ == "__main__": main()