#!/usr/bin/perl
#
# mailDuplicity01.pl
# Nacte soubory v adresari, provede HSH a zjisti duplicity
# 2023-10-05 23:15,

	use strict;
	use warnings;
	use feature 'say';
	use Data::Dumper;
	use Digest::MD5 qw(md5 md5_hex md5_base64);
	use File::Copy;
	use DateTime;
	
#	my $dir = "F:\\DanEmaily-smazat\\inboxBergamot\\Bergamot-d1d8\\Inbox";
	my $dir = "eml";
#	my $destDir = "F:\\DanEmaily-smazat\\inboxBergamot\\eml-uniq";
#	my $destDir = "H:\\TMP\\x\\eml-uniq";
	my $destDir = "eml-uniq";

	my $LogFileName = "log.txt";
	my $logData = "";

	

	
	opendir (DIR, $dir) or die "Couldn't open directory, $!";
	my @files = grep { /\.eml$/i } readdir DIR;
	closedir DIR;
#print join("\n", @files ), "\n";
	my $dt = DateTime->now;
print $dt->strftime( '%Y-%m-%d %H:%M:%S' );
print "\n--------------------\nFiles: ", scalar @files, "\n--------------------\n";
	$logData .= "$dt\t";
	$logData .= "Files: ".(scalar @files)."\n--------------------\n";
	
#	print @files;
	
	my @emls;
	foreach my $file (@files) {
		
		open my $fh, '<', "$dir\\$file"
			or die "Could not open $file for reading: $!";
		my @fileProperty = stat($fh);							# Vlastnosti souboru. 9. je time modify, 7. je size
		my $contents = do { local $/; <$fh> };
		my $digest = md5_hex($contents);
		my @eml = ($file, $fileProperty[9], $fileProperty[7], $digest);
#print "$file $fileProperty[9] $fileProperty[7] $digest\n";
#say @eml;
		push (@emls, \@eml);
	}
print "\nSoubory do pole: ", scalar @emls, "\n--------------------\n";
	$dt = DateTime->now;
	$logData .= "$dt\t";
	$logData .= "Soubory do pole: ".(scalar @emls)."\n--------------------\n";
	
	foreach my $eml (@emls) {
#print Dumper($eml), "\n";
#print "$eml->[0] $eml->[1]  $eml->[3]", "\n";
#@eml1 = @{ $eml->{0}->{0} };	
	}

my @sorted = map  { $_->[0] }              # Line 4
		reverse sort { $a->[1] cmp $b->[1] }  # Line 3
		map  { [ $_, $_->[3] ] }      # Line 2
		@emls;                        # Line 1

for ( @sorted ) {
#printf "%s %s %s %s\n", @{ $_ };
}
print "\nRazeni dle data DESC: ", scalar @sorted, "\n--------------------\n";
	$dt = DateTime->now;
	$logData .= "$dt\t";
	$logData .= "Razeni dle data DESC: ".(scalar @sorted)."\n--------------------\n";

my %seen;

my @uniquearr;
foreach my $unique (
	grep {
		not $seen{
#            join('', @{ $_ })
			$_->[3]
		}++
	} @sorted
) {
#    print join(',', @{ $unique }), "\n";
	push (@uniquearr, \@{ $unique });
}

for ( @uniquearr ) {
#   printf "%s %s %s %s\n", @{ $_ };
}

print "\nUnikatni: ", scalar @uniquearr, "\n--------------------\n";
	$dt = DateTime->now;
	$logData .= "$dt\t";
	$logData .= "Unikatni: ".(scalar @uniquearr)."\n--------------------\n";

for ( @uniquearr ) {
#printf "%s\n", $_->[0];
	copy("$dir\\$_->[0]", "$destDir\\$_->[0]")
		or die "copy failed: $!";
}

	$dt = DateTime->now;
	$logData .= "$dt\t";
	$logData .= "Konec: \n--------------------\n";


	open(HANDLE, ">$LogFileName");
	print HANDLE $logData;
	close HANDLE;

	$dt = DateTime->now;
print $dt->strftime( '%Y-%m-%d %H:%M:%S' );

	exit;