Skip to content

Commit 6d35b33

Browse files
committed
Extract iCloud media using their proper filename
This change implements extraction of iCloud media files under their "original" filename instead of using the cryptic "cloudAsset.UUID" as filename. Also implemented the `--ignore-icloud-media` option to ignore iCloud files altogether. This fixes #15 .
1 parent b9ba11e commit 6d35b33

File tree

1 file changed

+148
-57
lines changed

1 file changed

+148
-57
lines changed

src/ios_backup_extractor.pl

Lines changed: 148 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,8 @@
5252
kError => 2,
5353
};
5454

55+
my $s_extension_regex = qr{\.$wanted_extensions$}i;
56+
5557
# define command line options
5658
my %cmd_options;
5759

@@ -68,6 +70,7 @@
6870
'list-long', # more detailed device backup listing than --list
6971
'prepend-date', # prepend date to each filename
7072
'prepend-date-separator=s', # '-' (default), '_', None
73+
'ignore-icloud-media', # skip files from iCloud
7174
'debug', # enable internal debug messages (warning: a huge stderr output)
7275
) or exit 1; # EXIT_FAILURE;
7376

@@ -116,6 +119,8 @@ sub printHelp
116119
- ‘dash’ (default)
117120
- ‘underscore’
118121
- ‘none’
122+
--ignore-icloud-media
123+
Do not extract media downloaded to the device from iCloud.
119124
-d, --dry Dry run, don't copy any files.
120125
-v, --verbose Show more information while running.
121126
-h, --help Display help.
@@ -380,6 +385,79 @@ ($all_devices_backup_hashref)
380385

381386
# ----------------------------------------------------------------
382387

388+
sub mapICloudUuidToFilename ($manifest_dbh, $db_file)
389+
{
390+
my $sql = <<~SQL_END;
391+
SELECT relativePath, file FROM files
392+
WHERE domain='CameraRollDomain'
393+
AND relativePath LIKE 'Media/PhotoData/CPLAssets/%'
394+
SQL_END
395+
396+
my $sth = $manifest_dbh->prepare ($sql)
397+
or die qq{Error: 'prepare' method failed on$db_file’ SQLite db:\n},
398+
qq{\t$DBI::errstr.\n};
399+
400+
$sth->execute()
401+
or die qq{Error: 'execute' method failed on$db_file’ SQLite db:\n},
402+
qq{\t$DBI::errstr.\n};
403+
404+
my %icloud_uuid_filename_map;
405+
406+
while (my $row = $sth->fetchrow_hashref)
407+
{
408+
# skip files with an unsuitable extension right away
409+
defined $row->{relativePath} && $row->{relativePath} =~ $s_extension_regex
410+
or next;
411+
412+
my $bplist_obj = defined $row->{file} && !ref ($row->{file}) &&
413+
parseBPlist ($row->{file}, $db_file);
414+
$bplist_obj or next;
415+
416+
my $ext_bplist = ref $bplist_obj eq 'HASH'
417+
&& defined $bplist_obj->{'$objects'}
418+
&& ref $bplist_obj->{'$objects'} eq 'ARRAY'
419+
&& scalar (@{$bplist_obj->{'$objects'}}) > 4
420+
&& defined $bplist_obj->{'$objects'}[3]
421+
&& !ref ($bplist_obj->{'$objects'}[3])
422+
&& parseBPlist ($bplist_obj->{'$objects'}[3], $db_file);
423+
$ext_bplist or next;
424+
425+
my $orig_filename = ref $ext_bplist eq 'HASH'
426+
&& $ext_bplist->{'com.apple.assetsd.originalFilename'};
427+
defined $orig_filename or next;
428+
# Remove the file extension
429+
$orig_filename =~ s/\.[^.]+$//;
430+
431+
my $cloud_id = $ext_bplist->{'com.apple.assetsd.cloudAsset.UUID'};
432+
defined $cloud_id or next;
433+
434+
# create dictionary of cloudAsset.UUID => filename
435+
$icloud_uuid_filename_map{$cloud_id} = $orig_filename;
436+
}
437+
438+
return \%icloud_uuid_filename_map;
439+
}
440+
441+
# ----------------------------------------------------------------
442+
443+
sub replaceICloudFilename ($icloud_uuid_filename, $icloud_uid_filename_ref)
444+
{
445+
my ($icloud_uuid, $extension) = ($icloud_uuid_filename =~ /^(.*?)(\.[^.]+)$/);
446+
$icloud_uuid //= $icloud_uuid_filename;
447+
$extension //= q{};
448+
449+
if (defined $icloud_uid_filename_ref->{$icloud_uuid})
450+
{
451+
return $icloud_uid_filename_ref->{$icloud_uuid} . $extension;
452+
}
453+
else
454+
{
455+
return $icloud_uuid_filename;
456+
}
457+
}
458+
459+
# ----------------------------------------------------------------
460+
383461
sub extractMediaFiles
384462
{
385463
say STDERR q{Info: "--dry" mode enabled. No files will be copied.}
@@ -418,6 +496,13 @@ sub extractMediaFiles
418496
})
419497
or die "Error: Cannot open ‘$tmp_manifest_db’ as SQLite db: $DBI::errstr.\n";
420498
499+
# create the list of original iCloud filenames
500+
my $icloud_uid_filename_ref = mapICloudUuidToFilename ($dbh, $tmp_manifest_db)
501+
unless $cmd_options{'ignore-icloud-media'};
502+
# DEBUG
503+
say STDERR Data::Dumper::Dumper ($icloud_uid_filename_ref);
504+
505+
# list all media files
421506
my $eval_ok = eval {
422507
my $sql = <<~SQL_END;
423508
SELECT fileID, relativePath, file FROM files
@@ -437,52 +522,56 @@ sub extractMediaFiles
437522
my $media_location = q{};
438523
my $media_subdir = q{};
439524
440-
# filter all full-size JPG, HEIC... images
525+
# precompiled regexes
526+
# [DCIM]
527+
# * Media/DCIM/101APPLE/IMG_1111.JPG
528+
# * Media/PhotoData/Mutations/DCIM/101APPLE/IMG_1111/Adjustments/FullSizeRender.jpg
529+
my $dcim_regex = qr{^
530+
(?<media_location>
531+
.+
532+
/DCIM/)
533+
(?<media_subdir>
534+
\d+APPLE/)
535+
(?<filename>
536+
[^./]+)
537+
(?:\.|/)
538+
.*
539+
(?<extension>
540+
(?i:$wanted_extensions))
541+
$
542+
}x;
543+
# [iCloud]
544+
# * Media/PhotoData/CPLAssets/group101/1A1A1A1A-1A1A-1A1A-1A1A-1A1A1A1A1A1A.HEIC
545+
# * Media/PhotoData/Mutations/PhotoData/CPLAssets/group101 \
546+
# /1A1A1A1A-1A1A-1A1A-1A1A-1A1A1A1A1A1A/Adjustments/FullSizeRender.jpg
547+
my $icld_regex = qr{^
548+
(?<media_location>
549+
.+
550+
/PhotoData/CPLAssets/)
551+
(?<media_subdir>
552+
group\d+/)
553+
(?<filename>
554+
[^./]+)
555+
(?:\.|/)
556+
.*
557+
(?<extension>
558+
(?i:$wanted_extensions))
559+
$
560+
}x;
561+
562+
# filter all full-size JPG, HEIC, MOV... media files
441563
while (my $row = $sth->fetchrow_hashref)
442564
{
443565
my $file_id = $row->{fileID};
444566
my $relative_path = $row->{relativePath};
445567
446568
next unless ( $relative_path !~ /thumb/i
447569
&& $relative_path !~ /metadata/i
448-
&& $relative_path =~ /\.$wanted_extensions$/i);
449-
450-
# determine filename, e.g.
451-
# [DCIM]
452-
# * Media/DCIM/101APPLE/IMG_1111.JPG
453-
# * Media/PhotoData/Mutations/DCIM/101APPLE/IMG_1111/Adjustments/FullSizeRender.jpg
454-
unless ($relative_path =~ m{^
455-
(?<media_location>
456-
.+
457-
/DCIM/)
458-
(?<media_subdir>
459-
\d+APPLE/)
460-
(?<filename>
461-
[^./]+)
462-
(?:\.|/)
463-
.*
464-
(?<extension>
465-
(?i:$wanted_extensions))
466-
$
467-
}x
468-
# [iCloud]
469-
# * Media/PhotoData/CPLAssets/group101/1A1A1A1A-1A1A-1A1A-1A1A-1A1A1A1A1A1A.HEIC
470-
# * Media/PhotoData/Mutations/PhotoData/CPLAssets/group101 \
471-
# /1A1A1A1A-1A1A-1A1A-1A1A-1A1A1A1A1A1A/Adjustments/FullSizeRender.jpg
472-
or $relative_path =~ m{^
473-
(?<media_location>
474-
.+
475-
/PhotoData/CPLAssets/)
476-
(?<media_subdir>
477-
group\d+/)
478-
(?<filename>
479-
[^./]+)
480-
(?:\.|/)
481-
.*
482-
(?<extension>
483-
(?i:$wanted_extensions))
484-
$
485-
}x)
570+
&& $relative_path =~ $s_extension_regex);
571+
572+
# filter and match predefined files, like full-size JPG, HEIC, MOV...
573+
unless ( $relative_path =~ $dcim_regex # [DCIM]
574+
or $relative_path =~ $icld_regex) # [iCloud]
486575
{
487576
warn qq{Warning: Cannot determine filename from "$relative_path"\n}
488577
. qq{\tfileID: $file_id\n}
@@ -491,6 +580,9 @@ sub extractMediaFiles
491580
next;
492581
}
493582
583+
# save the filename and extension from the regex match
584+
my ($re_filename, $re_extension) = ($+{filename}, $+{extension});
585+
494586
if ($media_location ne $+{media_location} || $media_subdir ne $+{media_subdir})
495587
{
496588
($media_location, $media_subdir) = ($+{media_location}, $+{media_subdir});
@@ -499,8 +591,8 @@ sub extractMediaFiles
499591
500592
my $is_icloud_media = index ($media_location, 'CPLAssets') != -1;
501593
502-
# save the filename and extension from the regex match
503-
my ($re_filename, $re_extension) = ($+{filename}, $+{extension});
594+
# skip media from iCloud if requested
595+
next if ($is_icloud_media && $cmd_options{'ignore-icloud-media'});
504596
505597
# find the file in the blob storage
506598
my $subdir = $file_id =~ s/^(\w\w).+$/$1/r
@@ -511,17 +603,18 @@ sub extractMediaFiles
511603
# parse the bplist from the SQLite database for this entry
512604
my $bplist_obj = parseBPlist ($row->{file}, $file_id);
513605
514-
# DEBUG
515-
#my $obj_dump = Data::Dumper::Dumper ($bplist_obj);
516-
#$obj_dump =~ s/[^\x20-\x7E\x0A]/{β}/g; # Replaces non-printable characters
517-
#say ($obj_dump);
606+
# replace iCloud UUID filename with an original filename
607+
my $orig_filename = $is_icloud_media
608+
? replaceICloudFilename ($re_filename, $icloud_uid_filename_ref)
609+
: $re_filename;
518610
519611
# add '_DELETED' flag to files marked as deleted
520-
my $deleted_flag = ($cmd_options{'add-trash'} && $g_deleted_files{$relative_path})
521-
? '_DELETED'
522-
: q{};
612+
my $file_is_deleted = $g_deleted_files{$relative_path};
613+
my $deleted_suffix = ($cmd_options{'add-trash'} && $file_is_deleted)
614+
? '_DELETED'
615+
: q{};
523616
524-
my $filename = $re_filename . $deleted_flag . q{.} . lc $re_extension;
617+
my $filename = $orig_filename . $deleted_suffix . q{.} . lc $re_extension;
525618
526619
# find the "LastModified" date for this file
527620
my $lastmodif_time_piece
@@ -532,10 +625,9 @@ sub extractMediaFiles
532625
# say STDERR "\tLastModified: ", $lastmodif_time_piece->strftime('%F %T');
533626
534627
# find the "Birth" date for this file
535-
my $birth_time_piece
536-
= defined $bplist_obj
537-
? getBirthTimeFromBPListObj ($bplist_obj, $file_id)
538-
: undef;
628+
my $birth_time_piece = defined $bplist_obj
629+
? getBirthTimeFromBPListObj ($bplist_obj, $file_id)
630+
: undef;
539631
540632
# say STDERR "\tBirth ", $birth_time_piece->strftime('%F %T');
541633
@@ -545,7 +637,7 @@ sub extractMediaFiles
545637
&& defined $lastmodif_time_piece
546638
&& olderThanSince ($lastmodif_time_piece, \@g_since_date));
547639
548-
# determine output directory baseod on LastModified date
640+
# determine output directory based on LastModified date
549641
my $date_sub_dir = getDateSubDir ($lastmodif_time_piece);
550642
my $out_sub_dir = $g_out_dir;
551643
$out_sub_dir .= "/$date_sub_dir" if $date_sub_dir ne q{};
@@ -559,8 +651,7 @@ sub extractMediaFiles
559651
$out_sub_dir,
560652
$lastmodif_time_piece);
561653
562-
if ( not($cmd_options{'add-trash'})
563-
&& $g_deleted_files{$relative_path})
654+
if (not($cmd_options{'add-trash'}) && $file_is_deleted)
564655
{
565656
printf "%3d. ($subdir/$file_id) %-13s → <IN_TRASH>, Skipping...\n",
566657
$file_index, $filename;
@@ -1252,7 +1343,7 @@ ($dbh, $table_name, $db_filename)
12521343
sub createDeletedFileList ($tmp_fh)
12531344
{
12541345
# Deleted media info is located in:
1255-
# SQLite: 12/12b144c0bd44f2b3dffd9186d3f9c05b917cee25
1346+
# SQLite: 12/12b144c0bd44f2b3dffd9186d3f9c05b917cee25 (Media/PhotoData/Photos.sqlite)
12561347
# Table: ZASSET
12571348
# Column: Z_PK (Primary Key)
12581349
# Column: ZTRASHEDSTATE (1 if deleted)

0 commit comments

Comments
 (0)