#! /usr/bin/perl use warnings; use strict; use threads; use threads::shared; use File::Find (); use File::Compare (); use vars qw(@RootDirs %Files); @RootDirs = ('H:\test\'); print STDERR "Reading Files and sorting them...\n"; my $filesCount = 0; File::Find::find(sub { if (-f $_) { push (@{ $Files{-s _} }, $File::Find::name); if ($filesCount and $filesCount % 5000 == 0) { print STDERR "$filesCount files read\n"; } # if $filesCount++; } # if }, @RootDirs); print STDERR "$filesCount files read and sorted\n\n"; my $compareCount : shared = 0; my $doubleCount : shared = 0; my $finished : shared = 0; print STDERR "Comparing files...\n"; my $thread = threads->create(\&CompareFiles); while ($finished == 0) { sleep(5); printf STDERR "%2d%%\t$compareCount/$filesCount => $doubleCount\n", $compareCount*100/$filesCount; } # while $thread->join(); # ------------------------------------------------------------ sub CompareFiles { foreach (keys %Files) { my @items = @{ $Files{$_} }; my $count = $#items; $compareCount += $count+1; next if $count == 0; for my $i (0..$#items-1) { for my $j ($i+1..$#items) { my $cmp = File::Compare::compare($items[$i], $items[$j]); if ($cmp == 0) { $doubleCount++; print "$doubleCount\tA:\t$items[$i]\n"; print "$doubleCount\tB:\t$items[$j]\n\n"; } # if } # for } # for } # foreach print STDERR "Compared: $compareCount files: 2*$doubleCount Duplicates\n"; $finished = 1; } # CompareFiles # ------------------------------------------------------------