Generated: Sun Apr 15 11:45:54 2012 from chkdupes.pl 2011/11/19 11.1 KB.
#!/perl -w # NAME: chkdupes.pl # AIM: Read a folder, and subfolders, and check for any duplicate file names # This is so they can all be put in one folder, if possible # 19/11/2011 - Allow first item of two to be a single file # 18/11/2011 - Fix bug if two folder given - src and dest # 15/08/2011 - Update... # 22/07/2008 geoff mclane http://geoffair.net/mperl use strict; use warnings; use File::stat; use File::Basename; # split path ($name,$dir,$ext) = fileparse($file [, qr/\.[^.]*/] ) use Cwd; my $perl_dir = 'C:\GTools\perl'; unshift(@INC, $perl_dir); require 'lib_utils.pl' or die "Unable to load 'lib_utils.pl' ...\n"; # log file stuff our ($LF); my $pgmname = $0; if ($pgmname =~ /(\\|\/)/) { my @tmpsp = split(/(\\|\/)/,$pgmname); $pgmname = $tmpsp[-1]; } my $outfile = $perl_dir."\\temp.$pgmname.txt"; open_log($outfile); # user variables my $VERS = "0.0.3 2011-11-19"; my $load_log = 0; my $in_file = ''; my $show_ext = 0; my $max_name_wid = 65; my $debug_on = 0; my $def_file = 'def_file'; my $def_folder = "C:\\Users\\Public\\SAVES\\peru\\My Pictures\\Carla"; # DEFAULT, if NO command input my $in_folder = ""; my @folder_list = (); my @file_list = (); my %exthash = (); my @dup_list = (); my $verbose = 0; ### program variables my @warnings = (); my $cwd = cwd(); my $os = $^O; ### forward sub process_folder($); sub scan_dir($$$); sub VERB1() { return ($verbose >= 1); } sub VERB2() { return ($verbose >= 2); } sub VERB5() { return ($verbose >= 5); } sub VERB9() { return ($verbose >= 9); } sub show_warnings($) { my ($val) = @_; if (@warnings) { prt( "\nGot ".scalar @warnings." WARNINGS...\n" ); foreach my $itm (@warnings) { prt("$itm\n"); } prt("\n"); } else { ###prt( "\nNo warnings issued.\n\n" ); } } sub pgm_exit($$) { my ($val,$msg) = @_; if (length($msg)) { $msg .= "\n" if (!($msg =~ /\n$/)); prt($msg); } show_warnings($val); close_log($outfile,$load_log); exit($val); } sub prtw($) { my ($tx) = shift; $tx =~ s/\n$//; prt("$tx\n"); push(@warnings,$tx); } sub process_folder($) { my ($inf) = shift; my @subdirs = (); my ($file,$name); if (opendir( DIR, $inf)) { my @files = readdir(DIR); closedir DIR; foreach $file (@files) { if (($file eq '.')||($file eq '..')) { next; } my $ff = $inf . "\\" . $file; if (-d $ff) { push(@subdirs,$ff); } else { my ($nm,$dir,$ext) = fileparse( $ff, qr/\.[^.]*/ ); my $sb = stat($ff); #my ($nm,$dir) = fileparse( $ff ); $nm = lc($nm); $ext = lc($ext); $name = $nm.$ext; # 0 1 2 3 4 push( @file_list, [$ff, $name, 0, 0, \$sb] ); if (defined $exthash{$ext}) { $exthash{$ext} ++; } else { $exthash{$ext} = 1; } } } foreach my $fil (@subdirs) { process_folder($fil); } } else { prt( "ERROR: Can NOT open $inf ... $! ... \n" ); } } sub do_one_folder() { process_folder($folder_list[0]); my $incnt = scalar @file_list; prt( "Got $incnt file items to check ...\n" ); my $dup_cnt = 0; my ($ff1,$ff2,$i,$j,$name,$sb1,$sb2,$min,$len,$nn1,$nn2,$tm1,$tm2); for ($i = 0; $i < $incnt; $i++) { $name = $file_list[$i][1]; for ($j = 0; $j < $incnt; $j++) { if ($i != $j) { if ($name eq $file_list[$j][1]) { $file_list[$j][2]++; $file_list[$i][2]++; $file_list[$j][3] = $i; $file_list[$i][3] = $j; } } } } $dup_cnt = 0; for ($i = 0; $i < $incnt; $i++) { $name = $file_list[$i][1]; if ($file_list[$i][2] > 0) { $dup_cnt++; } } prt( "Got $dup_cnt duplicate names...\n" ); $min = 0; for ($i = 0; $i < $incnt; $i++) { $name = $file_list[$i][1]; if ($file_list[$i][2] > 0) { $j = $file_list[$i][3]; $ff1 = $file_list[$i][0]; $ff2 = $file_list[$j][0]; $len = length($ff1); $min = $len if ($len > $min); $len = length($ff2); $min = $len if ($len > $min); } } $min = $max_name_wid if ($min > $max_name_wid); for ($i = 0; $i < $incnt; $i++) { $name = $file_list[$i][1]; if ($file_list[$i][2] > 0) { #prt( "Dupe $name ...\n" ); if (VERB5()) { $j = $file_list[$i][3]; $ff1 = $file_list[$i][0]; $ff2 = $file_list[$j][0]; $sb1 = stat($ff1); $sb2 = stat($ff2); $nn1 = get_nn($sb1->size); $nn2 = get_nn($sb2->size); $tm1 = lu_get_YYYYMMDD_hhmmss($sb1->mtime); $tm2 = lu_get_YYYYMMDD_hhmmss($sb2->mtime); $ff1 .= ' ' while (length($ff1) < $min); $ff2 .= ' ' while (length($ff2) < $min); $nn1 = ' '.$nn1 while (length($nn1) < 12); $nn2 = ' '.$nn2 while (length($nn2) < 12); prt("$ff1 $nn1 $tm1\n"); prt("$ff2 $nn2 $tm1\n"); } elsif (VERB1()) { prt( "del \"$file_list[$i][0]\"\n" ); } } } prt( "Done $dup_cnt duplicate names...\n" ) if (VERB1()); } sub scan_dir($$$) { my ($ra,$inf,$lev) = @_; pgm_exit(1,"ERROR: scan_dir: Passed null value!\n") if (length($inf) == 0); my @subdirs = (); my ($file,$ff,$name); prt("Scanning [$inf]...\n") if ($lev == 0); if (opendir( DIR, $inf)) { my @files = readdir(DIR); closedir DIR; $inf .= "\\" if ( !($inf =~ /(\\|\/)$/) ); foreach $file (@files) { next if (($file eq '.')||($file eq '..')); $ff = $inf.$file; if (-d $ff) { push(@subdirs,$ff); } else { my ($nm,$dir,$ext) = fileparse( $ff, qr/\.[^.]*/ ); #my ($nm,$dir) = fileparse( $ff ); $nm = lc($nm); $ext = lc($ext); $name = $nm.$ext; # 0 1 2 3 push( @{$ra}, [$file, $ff, $name, 0] ); if (defined $exthash{$ext}) { $exthash{$ext} ++; } else { $exthash{$ext} = 1; } } } foreach my $fil (@subdirs) { scan_dir($ra,$fil,$lev+1); } } else { pgm_exit(1,"ERROR: Can NOT open [$inf] ... $! ... \n" ); } } sub compare_lists($$) { my ($ra1,$ra2) = @_; # = \@arr1,\@arr2 my $cnt1 = scalar @{$ra1}; my $cnt2 = scalar @{$ra2}; prt("Comparing list 1 = $cnt1, with list 2 = $cnt2...\n"); my ($fil1,$fil2,$nm1,$nm2,$fnd,$i,$j,$min,$len); # 0 1 2 3 #push( @{$ra}, [$file, $ff, $name, 0] ); $min = 0; for ($i = 0; $i < $cnt1; $i++) { $fil1 = ${$ra1}[$i][0]; $len = length($fil1); $min = $len if ($len > $min); } for ($i = 0; $i < $cnt1; $i++) { $fil1 = ${$ra1}[$i][0]; $nm1 = ${$ra1}[$i][2]; $fnd = 0; for ($j = 0; $j < $cnt2; $j++) { $fil2 = ${$ra2}[$j][0]; $nm2 = ${$ra2}[$j][2]; if ($nm1 eq $nm2) { $fnd = 1; last; } } $fil1 .= ' ' while (length($fil1) < $min); if ($fnd) { prtw("File $fil1 is DUPLICATED in list 2!\n"); } else { prt("File $fil1 NOT found in list 2!\n"); } } #prt("WARNING: Coding NOT completed!\n"); } sub show_extensions() { my $cnt = scalar keys(%exthash); prt("Got list of $cnt entensions...\n"); foreach my $key (keys %exthash) { prt( "Extension $key occurs ".$exthash{$key}." times ...\n" ); } prt("Done list of $cnt entensions...\n"); } parse_args(@ARGV); ###prt( "$0 ... Processing $in_folder ...\n" ); if (scalar @folder_list == 1) { do_one_folder(); # check folder for duplicate names??? } elsif (scalar @folder_list == 2) { my (@arr1,@arr2); my $f1 = $folder_list[0]; my $f2 = $folder_list[1]; if (-f $f1) { # 0 1 2 3 #push( @{$ra}, [$file, $ff, $name, 0] ); my ($nm,$dr) = fileparse($f1); push(@arr1, [$nm, $f1, lc($nm),0]); } elsif (-d $f1) { scan_dir(\@arr1,$f1,0); } else { pgm_exit(1,"First item is neither file, nor folder [$f1]!\n"); } scan_dir(\@arr2,$f2,0); compare_lists(\@arr1,\@arr2); } show_extensions() if ($show_ext); pgm_exit(0,""); ################################ sub give_help { prt("$pgmname: version $VERS\n"); prt("Usage: $pgmname [options] in-folder/in-file [in-folder2]\n"); prt("Options:\n"); prt(" --help (-h or -?) = This help, and exit 0.\n"); prt(" --load (-l) = Load log at end.\n"); prt(" --show (-s) = Also show extension list.\n"); prt(" --verb (-v) = Bump verbosity.\n"); prt("If just ONE directory given, then it will be checked for duplicate files.\n"); prt("If TWO folders given, they will be compared, and duplicate files reported.\n"); } sub need_arg { my ($arg,@av) = @_; pgm_exit(1,"ERROR: [$arg] must have following argument!\n") if (!@av); } sub parse_args { my (@av) = @_; my ($arg,$sarg,$cnt); while (@av) { $arg = $av[0]; if ($arg =~ /^-/) { $sarg = substr($arg,1); $sarg = substr($sarg,1) while ($sarg =~ /^-/); if (($sarg =~ /^h/i)||($sarg eq '?')) { give_help(); pgm_exit(0,"Help exit(0)"); } elsif ($sarg =~ /^l/) { $load_log = 1; } elsif ($sarg =~ /^s/) { $show_ext = 1; } elsif ($sarg =~ /^v/i) { if ($sarg =~ /^v(\d+)$/) { $verbose = $1; } else { while ($sarg =~ /^v/) { $verbose++; $sarg = substr($sarg,1); } } prt("Set verbosity to $verbose\n") if (VERB1()); } else { pgm_exit(1,"ERROR: Invalid argument [$arg]! Try -?\n"); } } else { $in_file = $arg; if ((-d $in_file)||(-f $in_file)) { push(@folder_list,$in_file); $cnt = scalar @folder_list; if ($cnt > 2) { pgm_exit(1,"ERROR: Can only give two folders. folder [$in_file] is 3rd!\n"); } prt("Set input $cnt to [$in_file]\n"); } else { pgm_exit(1,"ERROR: Can NOT locate folder [$in_file]!\n"); } } shift @av; } if ((length($in_file) == 0) && $debug_on) { $in_file = $def_file; } if (length($in_file) == 0) { pgm_exit(1,"ERROR: No input files found in command!\n"); } #if (! -f $in_file) { # pgm_exit(1,"ERROR: Unable to find in file [$in_file]! Check name, location...\n"); #} } # eof - chkdupes.pl