Selaa lähdekoodia

Add photos archiver script and update .gitignore to include photos-archive directory

main
Andreas Demmelbauer 1 viikko sitten
vanhempi
commit
e21ecab2ec
2 muutettua tiedostoa jossa 874 lisäystä ja 0 poistoa
  1. +1
    -0
      .gitignore
  2. +873
    -0
      photos_archiver.py

+ 1
- 0
.gitignore Näytä tiedosto

@@ -1,5 +1,6 @@
export/
output/
photos-archive/
iphone-backup/
Messages/
*.sqlite

+ 873
- 0
photos_archiver.py Näytä tiedosto

@@ -0,0 +1,873 @@
#!/usr/bin/env python3

# Archives iPhone photos from a local unencrypted backup and generates an HTML gallery.

import argparse
import os
import shutil
import sqlite3
from pathlib import Path
from datetime import datetime
from collections import defaultdict
import struct

def read_exif_date(file_path):
"""Simple EXIF reader to extract date information from image files."""
try:
with open(file_path, 'rb') as f:
# Read file header to determine format
header = f.read(12)
f.seek(0)
if header.startswith(b'\xff\xe1') and b'Exif' in header:
# JPEG with EXIF
return _read_jpeg_exif_date(f)
elif header.startswith(b'\xff\xd8'):
# JPEG - scan for EXIF segment
return _scan_jpeg_exif_date(f)
elif header[4:8] == b'ftyp':
# HEIC format - basic attempt
return _read_heic_exif_date(f)
elif header.startswith(b'\x89PNG'):
# PNG format
return _read_png_exif_date(f)
except Exception:
pass
return None

def _read_jpeg_exif_date(f):
"""Read EXIF date from JPEG file."""
f.seek(0)
# Find EXIF segment
while True:
marker = f.read(2)
if not marker or marker[0] != 0xff:
break
if marker == b'\xff\xe1': # APP1 segment (EXIF)
length = struct.unpack('>H', f.read(2))[0]
exif_data = f.read(length - 2)
if exif_data.startswith(b'Exif\x00\x00'):
return _parse_exif_data(exif_data[6:])
else:
# Skip other segments
if marker[1] in [0xd8, 0xd9]: # SOI, EOI
continue
try:
length = struct.unpack('>H', f.read(2))[0]
f.seek(length - 2, 1)
except:
break
return None

def _scan_jpeg_exif_date(f):
"""Scan JPEG file for EXIF segment."""
f.seek(0)
data = f.read(65536) # Read first 64KB
# Look for EXIF marker
exif_pos = data.find(b'Exif\x00\x00')
if exif_pos > 0:
return _parse_exif_data(data[exif_pos + 6:])
return None

def _read_heic_exif_date(f):
"""Basic HEIC EXIF reading - simplified approach."""
f.seek(0)
# Read a larger chunk to find EXIF data
data = f.read(2 * 1024 * 1024) # 2MB should be enough for metadata
# Look for EXIF marker in HEIC - try multiple patterns
patterns = [b'Exif\x00\x00', b'Exif\x00\x01', b'EXIF\x00\x00']
for pattern in patterns:
exif_pos = data.find(pattern)
if exif_pos >= 0:
# Try to parse EXIF data starting after the marker
try:
result = _parse_exif_data(data[exif_pos + len(pattern):])
if result:
return result
except:
continue
# Alternative: look for datetime strings directly in the file
return _scan_for_datetime_strings(data)

def _scan_for_datetime_strings(data):
"""Scan binary data for datetime strings."""
import re
try:
# Convert to string for regex search, ignoring decode errors
text = data.decode('ascii', errors='ignore')
# Look for datetime patterns like "2024:08:15 14:30:45"
datetime_pattern = r'20\d{2}:\d{2}:\d{2}\s+\d{2}:\d{2}:\d{2}'
matches = re.findall(datetime_pattern, text)
if matches:
# Return the first valid datetime found
return matches[0]
except:
pass
return None

def _read_png_exif_date(f):
"""Read EXIF date from PNG file."""
f.seek(8) # Skip PNG signature
while True:
try:
# Read chunk length and type
length_data = f.read(4)
if len(length_data) != 4:
break
length = struct.unpack('>I', length_data)[0]
chunk_type = f.read(4)
if len(chunk_type) != 4:
break
if chunk_type == b'eXIf':
# PNG EXIF chunk - contains standard EXIF data
exif_data = f.read(length)
return _parse_exif_data(exif_data)
elif chunk_type == b'iTXt':
# International text chunk - might contain date
chunk_data = f.read(length)
try:
# iTXt format: keyword\0compression\0language\0translated_keyword\0text
parts = chunk_data.split(b'\0', 4)
if len(parts) >= 5:
keyword = parts[0].decode('latin-1', errors='ignore')
text = parts[4].decode('utf-8', errors='ignore')
# Look for date-related keywords
if keyword.lower() in ['date', 'datetime', 'creation time', 'date:create', 'exif:datetime']:
# Try to parse as datetime
import re
datetime_match = re.search(r'20\d{2}[:-]\d{2}[:-]\d{2}[\sT]\d{2}:\d{2}:\d{2}', text)
if datetime_match:
date_str = datetime_match.group()
# Convert to EXIF format
date_str = date_str.replace('-', ':').replace('T', ' ')
return date_str
except:
pass
elif chunk_type == b'tEXt':
# Text chunk - might contain date
chunk_data = f.read(length)
try:
# tEXt format: keyword\0text
null_pos = chunk_data.find(b'\0')
if null_pos > 0:
keyword = chunk_data[:null_pos].decode('latin-1', errors='ignore')
text = chunk_data[null_pos+1:].decode('latin-1', errors='ignore')
if keyword.lower() in ['date', 'creation time', 'timestamp']:
import re
datetime_match = re.search(r'20\d{2}[:-]\d{2}[:-]\d{2}[\sT]\d{2}:\d{2}:\d{2}', text)
if datetime_match:
date_str = datetime_match.group()
date_str = date_str.replace('-', ':').replace('T', ' ')
return date_str
except:
pass
else:
# Skip other chunk types
f.seek(length, 1)
# Skip CRC
f.seek(4, 1)
except (struct.error, OSError):
break
return None

def _parse_exif_data(exif_data):
"""Parse EXIF data to extract date tags."""
if len(exif_data) < 8:
return None
try:
# Check byte order
if exif_data[:2] == b'II':
endian = '<' # Little endian
elif exif_data[:2] == b'MM':
endian = '>' # Big endian
else:
return None
# Get IFD offset
ifd_offset = struct.unpack(endian + 'I', exif_data[4:8])[0]
if ifd_offset >= len(exif_data):
return None
# Read IFD entries
date_tags = {
0x9003: 'DateTimeOriginal', # EXIF DateTimeOriginal
0x0132: 'DateTime', # Image DateTime
0x9004: 'DateTimeDigitized', # EXIF DateTimeDigitized
0x0306: 'DateTime', # Additional DateTime tag
}
# Try to find date in IFD0
date_value = _read_ifd_dates(exif_data, ifd_offset, endian, date_tags)
if date_value:
return date_value
# Try EXIF sub-IFD if available
exif_ifd_offset = _find_exif_ifd(exif_data, ifd_offset, endian)
if exif_ifd_offset and exif_ifd_offset < len(exif_data):
date_value = _read_ifd_dates(exif_data, exif_ifd_offset, endian, date_tags)
if date_value:
return date_value
# Try IFD1 (thumbnail) if available
ifd1_offset = _get_next_ifd(exif_data, ifd_offset, endian)
if ifd1_offset and ifd1_offset < len(exif_data):
date_value = _read_ifd_dates(exif_data, ifd1_offset, endian, date_tags)
if date_value:
return date_value
except Exception:
pass
return None

def _read_ifd_dates(exif_data, ifd_offset, endian, date_tags):
"""Read date tags from IFD."""
try:
if ifd_offset + 2 >= len(exif_data):
return None
entry_count = struct.unpack(endian + 'H', exif_data[ifd_offset:ifd_offset + 2])[0]
for i in range(entry_count):
entry_offset = ifd_offset + 2 + (i * 12)
if entry_offset + 12 > len(exif_data):
break
tag, tag_type, count, value_offset = struct.unpack(
endian + 'HHII', exif_data[entry_offset:entry_offset + 12]
)
if tag in date_tags:
# Handle ASCII string (type 2)
if tag_type == 2:
if count <= 4:
# Value stored in value_offset field
value_data = struct.pack(endian + 'I', value_offset)[:count-1]
else:
# Value stored at offset
if value_offset + count <= len(exif_data):
value_data = exif_data[value_offset:value_offset + count - 1]
else:
continue
try:
date_str = value_data.decode('ascii')
if len(date_str) >= 19 and ':' in date_str: # "YYYY:MM:DD HH:MM:SS"
return date_str
except:
continue
# Handle other types that might contain date strings
elif tag_type in [1, 3, 4, 5]: # BYTE, SHORT, LONG, RATIONAL
try:
if count <= 4:
# Data stored inline
raw_data = struct.pack(endian + 'I', value_offset)
else:
# Data stored at offset
if value_offset + count * 4 <= len(exif_data):
raw_data = exif_data[value_offset:value_offset + min(count * 4, 20)]
else:
continue
# Try to decode as ASCII
try:
potential_date = raw_data.decode('ascii', errors='ignore').rstrip('\x00')
if len(potential_date) >= 19 and ':' in potential_date:
return potential_date
except:
pass
except:
continue
except Exception:
pass
return None

def _get_next_ifd(exif_data, ifd_offset, endian):
"""Get the offset of the next IFD."""
try:
if ifd_offset + 2 >= len(exif_data):
return None
entry_count = struct.unpack(endian + 'H', exif_data[ifd_offset:ifd_offset + 2])[0]
next_ifd_offset_pos = ifd_offset + 2 + (entry_count * 12)
if next_ifd_offset_pos + 4 <= len(exif_data):
next_ifd_offset = struct.unpack(endian + 'I', exif_data[next_ifd_offset_pos:next_ifd_offset_pos + 4])[0]
return next_ifd_offset if next_ifd_offset > 0 else None
except Exception:
pass
return None

def _find_exif_ifd(exif_data, ifd_offset, endian):
"""Find EXIF sub-IFD offset."""
try:
if ifd_offset + 2 >= len(exif_data):
return None
entry_count = struct.unpack(endian + 'H', exif_data[ifd_offset:ifd_offset + 2])[0]
for i in range(entry_count):
entry_offset = ifd_offset + 2 + (i * 12)
if entry_offset + 12 > len(exif_data):
break
tag, tag_type, count, value_offset = struct.unpack(
endian + 'HHII', exif_data[entry_offset:entry_offset + 12]
)
if tag == 0x8769: # EXIF IFD tag
return value_offset
except Exception:
pass
return None

def copy_camera_roll(backup_path: Path, output_path: Path):
manifest_db = backup_path / "Manifest.db"
if not manifest_db.exists():
raise FileNotFoundError(f"Manifest.db not found in {backup_path}")

conn = sqlite3.connect(manifest_db)
cursor = conn.cursor()

# Query all files from CameraRollDomain
cursor.execute("""
SELECT fileID, relativePath
FROM Files
WHERE domain = 'CameraRollDomain'
""")

rows = cursor.fetchall()
print(f"Found {len(rows)} CameraRollDomain files")

for file_id, relative_path in rows:
# FileID is stored as 40-char hex. Backup stores it as <first 2>/<full>
src = backup_path / file_id[:2] / file_id
if not src.exists():
print(f"⚠️ Missing file: {src}")
continue

dest = output_path / relative_path
dest.parent.mkdir(parents=True, exist_ok=True)

if not dest.exists():
shutil.copy2(src, dest)
print(f"✅ Copied {relative_path}")
else:
print(f"⏩ Skipped (already exists): {relative_path}")

conn.close()
print("🎉 Backup extraction completed.")


def find_display_file(original_file, metadata_dcim, thumbnails_dcim):
"""Find the best display file (metadata JPG or thumbnail) for an original file."""
base_name = original_file.stem # e.g., "IMG_1105"
# First try to find in metadata
if metadata_dcim.exists():
for folder in metadata_dcim.iterdir():
if folder.is_dir():
metadata_jpg = folder / f"{base_name}.JPG"
if metadata_jpg.exists():
return metadata_jpg, "metadata"
# Fallback to thumbnails - each image has its own directory named with full filename
if thumbnails_dcim.exists():
for dcim_folder in thumbnails_dcim.iterdir():
if dcim_folder.is_dir():
# Look for a directory named after the full original filename
image_dir = dcim_folder / original_file.name
if image_dir.exists() and image_dir.is_dir():
# Find the JPG file inside this directory (usually numbered like 5003.JPG)
for jpg_file in image_dir.glob("*.JPG"):
return jpg_file, "thumbnail"
# If no display file found, use original
return original_file, "original"

def get_all_original_files(original_dcim):
"""Get all original image/video files from DCIM folders."""
original_files = []
for folder in original_dcim.iterdir():
if not folder.is_dir():
continue
for ext in ['.HEIC', '.JPG', '.PNG', '.MOV', '.MP4', '.JPEG']:
for file_path in folder.glob(f"*{ext}"):
original_files.append(file_path)
return original_files

def get_file_info(file_path):
"""Get file information including size, modification time, and date taken from EXIF."""
stat = file_path.stat()
# Try to get date taken from EXIF data using our custom reader
date_taken = None
date_taken_obj = None
if file_path.suffix.lower() in ['.jpg', '.jpeg', '.heic', '.png']:
try:
exif_date = read_exif_date(file_path)
if exif_date:
try:
date_taken_obj = datetime.strptime(exif_date, '%Y:%m:%d %H:%M:%S')
date_taken = date_taken_obj.strftime('%Y-%m-%d %H:%M:%S')
except ValueError:
pass
except Exception:
pass # Ignore errors reading EXIF data
# Fallback to file modification time if no EXIF date
if not date_taken_obj:
date_taken_obj = datetime.fromtimestamp(stat.st_mtime)
date_taken = date_taken_obj.strftime('%Y-%m-%d %H:%M:%S')
return {
'size': stat.st_size,
'date_taken': date_taken,
'date_taken_obj': date_taken_obj,
'size_mb': round(stat.st_size / (1024 * 1024), 2)
}

def generate_gallery(photos_root: Path):
"""Generate the HTML image gallery."""
html_view = photos_root / "html_view"
# Paths for different file types
metadata_dcim = photos_root / "Media" / "PhotoData" / "Metadata" / "DCIM"
thumbnails_dcim = photos_root / "Media" / "PhotoData" / "Thumbnails" / "V2" / "DCIM"
original_dcim = photos_root / "Media" / "DCIM"
if not original_dcim.exists():
print(f"❌ Original DCIM folder not found: {original_dcim}")
return
print(f"📁 Looking for display files in:")
print(f" Metadata: {metadata_dcim.exists() and 'Found' or 'Not found'}")
print(f" Thumbnails: {thumbnails_dcim.exists() and 'Found' or 'Not found'}")
# Get all original files
original_files = get_all_original_files(original_dcim)
print(f"Found {len(original_files)} original files")
if not original_files:
print("❌ No images found to generate gallery")
return
# Collect all images
images = []
metadata_count = 0
thumbnail_count = 0
original_only_count = 0
for original_file in original_files:
# Find the best display file
display_file, display_type = find_display_file(original_file, metadata_dcim, thumbnails_dcim)
# Count display types
if display_type == "metadata":
metadata_count += 1
elif display_type == "thumbnail":
thumbnail_count += 1
else:
original_only_count += 1
# Get file info
original_info = get_file_info(original_file)
display_info = get_file_info(display_file) if display_file != original_file else original_info
# Get folder name from original file path
folder_name = original_file.parent.name
images.append({
'name': original_file.stem,
'display_path': str(display_file.relative_to(photos_root)),
'original_path': str(original_file.relative_to(photos_root)),
'folder': folder_name,
'display_info': display_info,
'original_info': original_info,
'original_ext': original_file.suffix.upper(),
'display_type': display_type,
'display_ext': display_file.suffix.upper()
})
print(f"📊 Total original files: {len(original_files)}")
print(f"📊 Using metadata for display: {metadata_count}")
print(f"📊 Using thumbnails for display: {thumbnail_count}")
print(f"📊 Using original for display: {original_only_count}")
print(f"📊 Images to display: {len(images)}")
# Sort images by date taken (newest first), then by name
images.sort(key=lambda x: (x['original_info']['date_taken_obj'], x['name']), reverse=True)
# Group images by date
grouped_images = defaultdict(list)
for img in images:
date_key = img['original_info']['date_taken_obj'].strftime('%Y-%m-%d')
grouped_images[date_key].append(img)
# Generate HTML content
html_content = generate_html_content(images, grouped_images)
# Write the HTML file
html_view.mkdir(exist_ok=True)
output_file = html_view / "index.html"
with open(output_file, 'w', encoding='utf-8') as f:
f.write(html_content)
print(f"✅ Gallery generated: {output_file}")
print(f"📊 {len(images)} images included")
print(f"🌐 Open {output_file} in your browser to view the gallery")

def generate_html_content(images, grouped_images):
"""Generate the HTML content for the gallery."""
return f"""<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>iPhone Gallery Archive</title>
<style>
* {{
margin: 0;
padding: 0;
box-sizing: border-box;
}}
body {{
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
background: #f5f5f5;
color: #333;
}}
.header {{
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
color: white;
padding: 2rem;
text-align: center;
box-shadow: 0 2px 10px rgba(0,0,0,0.1);
}}
.header h1 {{
font-size: 2.5rem;
margin-bottom: 0.5rem;
}}
.stats {{
margin-top: 1rem;
opacity: 0.9;
}}
.gallery-container {{
margin: 2rem auto;
padding: 0 1rem;
}}
.date-section {{
margin-bottom: 2rem;
}}
.date-header {{
color: #495057;
border-top: 1px solid #ddd;
padding: 1rem 0 0;
margin-bottom: 1rem;
font-size: 1rem;
}}
.date-subheader {{
font-size: 0.9rem;
opacity: 0.7;
margin-top: 0.25rem;
}}
.gallery {{
display: grid;
grid-template-columns: repeat(auto-fill, minmax(250px, 1fr));
gap: 1rem;
margin-bottom: 2rem;
}}
.image-card {{
overflow: hidden;
}}
.image-card .date-taken {{
margin-top: 0.5rem;
font-size: 0.85rem;
}}
.image-card .file-path {{
font-size: 0.75rem;
margin-bottom: 0.25rem;
word-break: break-all;
}}
.image-container {{
position: relative;
width: 100%;
overflow: hidden;
background: linear-gradient(90deg, #f0f0f0 25%, #e0e0e0 50%, #f0f0f0 75%);
background-size: 200% 100%;
animation: loading 1.5s infinite;
line-height: 0;
border-radius: 8px;
}}
@keyframes loading {{
0% {{
background-position: 200% 0;
}}
100% {{
background-position: -200% 0;
}}
}}
.image-container img {{
width: 100%;
height: auto;
object-fit: contain;
transition: transform 0.3s ease, opacity 0.3s ease;
background: transparent;
line-height: 0;
border-radius: 8px;
}}
.image-container img[loading="lazy"]:not([src]) {{
opacity: 0;
}}
.image-container img[loading="lazy"] {{
opacity: 1;
}}
.image-container:has(img[loading="lazy"][src]) {{
background: white;
animation: none;
}}
.image-card:hover .image-container img {{
transform: scale(1.02);
}}
.video-indicator {{
position: absolute;
top: 0.5rem;
right: 0.5rem;
background: rgba(0, 0, 0, 0.8);
color: white;
padding: 0.3rem 0.5rem;
border-radius: 4px;
font-size: 0.75rem;
font-weight: 600;
display: flex;
align-items: center;
gap: 0.2rem;
line-height: 1.2;
}}
.overlay {{
position: absolute;
top: 0;
left: 0;
right: 0;
bottom: 0;
background: linear-gradient(to bottom, transparent 40%, rgba(0,0,0,0.9));
opacity: 0;
transition: opacity 0.3s ease;
display: flex;
flex-direction: column;
justify-content: flex-end;
padding: 1rem;
}}
.image-card:hover .overlay {{
opacity: 1;
}}
.overlay-content {{
color: white;
margin-bottom: 1rem;
}}
.overlay-content .filename {{
font-weight: 600;
margin-bottom: 0.5rem;
}}
.overlay-content .details {{
opacity: 0.9;
line-height: 1.4;
}}
.overlay-buttons {{
display: flex;
gap: 0.5rem;
}}
.btn {{
padding: 0.6rem 1rem;
border: none;
border-radius: 6px;
text-decoration: none;
text-align: center;
transition: all 0.3s ease;
cursor: pointer;
font-size: 0.9rem;
width: 100%;
line-height: 1.2;
}}
.btn-primary {{
background: rgba(255, 255, 255, 0.95);
color: #333;
}}
.btn-primary:hover {{
background: white;
box-shadow: 0 4px 12px rgba(0,0,0,0.2);
}}
.folder-badge {{
display: inline-block;
background: #e9ecef;
color: #495057;
padding: 0.2rem 0.5rem;
border-radius: 12px;
font-size: 0.8rem;
font-weight: 600;
}}
@media (max-width: 768px) {{
.gallery {{
grid-template-columns: repeat(auto-fill, minmax(200px, 1fr));
gap: 0.75rem;
}}
.header h1 {{
font-size: 2rem;
}}
.overlay {{
padding: 0.75rem;
}}
}}
</style>
</head>
<body>
<div class="header">
<h1>📸 iPhone Camera Roll Gallery</h1>
<p>Extracted from iPhone backup</p>
<div class="stats">
{len(images)} photos • {len(set(img['folder'] for img in images))} folders
</div>
</div>
<div class="gallery-container">
{generate_gallery_sections(grouped_images)}
</div>
</body>
</html>"""

def generate_gallery_sections(grouped_images):
"""Generate HTML for gallery sections grouped by date."""
sections_html = ""
for date_key in sorted(grouped_images.keys(), reverse=True):
date_obj = datetime.strptime(date_key, '%Y-%m-%d')
date_display = date_obj.strftime('%d.%m.%Y')
image_count = len(grouped_images[date_key])
sections_html += f"""
<div class="date-section">
<div class="date-header">
{date_display}
<div class="date-subheader">{image_count} {'photo' if image_count == 1 else 'photos'}</div>
</div>
<div class="gallery">
"""
for img in grouped_images[date_key]:
# Check if it's a video file
is_video = img['original_ext'].lower() in ['.mov', '.mp4']
video_indicator = '<div class="video-indicator">🎬 VIDEO</div>' if is_video else ''
sections_html += f"""
<div class="image-card">
<div class="image-container">
<img src="../{img['display_path']}" alt="{img['name']}" loading="lazy" decoding="async">
{video_indicator}
<div class="overlay">
<div class="overlay-content">
<div class="details">
<div class="file-path">{img['original_path']}</div>"""
# Add date taken if available
if img['original_info']['date_taken']:
sections_html += f"""
<div class="date-taken">{img['original_info']['date_taken']}</div>"""
sections_html += f"""
</div>
</div>
<div class="overlay-buttons">
<a href="../{img['original_path']}" target="_blank" class="btn btn-primary">
Original ({img['original_info']['size_mb']} MB)
</a>
</div>
</div>
</div>
</div>
"""
sections_html += """
</div>
</div>
"""
return sections_html


def main():
parser = argparse.ArgumentParser(description="Extract Camera Roll from iPhone backup and optionally generate HTML gallery")
parser.add_argument("--backup-path", required=True, type=Path,
help="Path to iPhone backup folder (with Manifest.db)")
parser.add_argument("--output-path", required=True, type=Path,
help="Path where Camera Roll should be restored")
parser.add_argument("--generate-gallery", action="store_true",
help="Generate HTML gallery after extraction")

args = parser.parse_args()

# Extract camera roll
copy_camera_roll(args.backup_path, args.output_path)
# Generate gallery if requested
if args.generate_gallery:
print("\n🖼️ Generating HTML gallery...")
generate_gallery(args.output_path)


if __name__ == "__main__":
main()

Ladataan…
Peruuta
Tallenna