#!/usr/bin/perl -w
#
# Zymonic Business Process and Information Management System
# Copyright Zednax Limited 2008 -
# For Authors and Changelog see the subversion history

# error handling
BEGIN
{
    use Zymonic::Utils qw(death_handler clean);
    $main::SIG{__DIE__} = \&death_handler;
}

# Modules
use Zymonic;
use Zymonic::Config;

$Zymonic::system  = '';
$Zymonic::session = '';

$Zymonic::system = clean( shift @ARGV, '_' ) || '';
my $table_name    = clean( shift @ARGV, '_' )  || '';
my $id_field      = clean( shift @ARGV, '_' )  || '';
my $key_fields_in = clean( shift @ARGV, '_,' ) || '';
my $oldest      = clean( shift @ARGV ) || '';
my $real_delete = clean( shift @ARGV ) || '';
my @key_fields = split( ',', $key_fields_in );

unless ( $Zymonic::system && $table_name && $id_field && $key_fields_in )
{
    print
      "Usage: remove_table_duplicates.pl [system] [table name] [id field] [fields] [oldest flag *] [real delete *]\n"
      . "\t *=optional\n"
      . "\t system - zymonic system to act on\n"
      . "\t table name - name of the table to remove duplicates from\n"
      . "\t id field - unique id field of each record in the table\n"
      . "\t fields - comma seperated list of fields in the table to use to identical duplicates\n"
      . "\t oldest flag - by default this will keep the most recent record, set this flag to keep the oldest\n"
      . "\t real delete - by default the duplicate record will just be marked as deleted, set this flag to actually remove them\n";
    exit(0);
}

$Zymonic::ZCONFIG{$Zymonic::system} = Zymonic::Config->new(
    system_name => $Zymonic::system,
    config_dir  => "/etc/zymonic",
    ip_address  => '127.0.0.1',
    protocol    => 'http'
);

my $db = $Zymonic::ZCONFIG{$Zymonic::system}->{DB};

# find duplicates, those with more than 1 record
my $field_list = join( ', ', @key_fields );
my $record_counts = $db->run_query(
    {
        string => "SELECT $field_list, COUNT(*) AS zz_count FROM $table_name GROUP BY $field_list HAVING COUNT(*) > 1",
        params => [],
    }
);

# for each found remove the duplicates
foreach my $record ( @{$record_counts} )
{
    print "Found $record->{zz_count} duplicates where: "
      . join( ', ', map { $_ . '=' . $record->{$_} } @key_fields ) . "\n";
    my $duplicated_records = $db->run_query(
        {
            string => "SELECT $id_field, $field_list FROM $table_name WHERE "
              . join( ' AND ', map { $_ . '= ?' } @key_fields )
              . ' ORDER BY zzlu '
              . ( $oldest ? 'ASC' : 'DESC' ),
            params => [ map { $record->{$_} } @key_fields ],
        }
    );

    # delete all but the first record (this will be oldest/newest from order in SQL)
    shift @{$duplicated_records};
    my @remaining_ids = map { $_->{$id_field} } @{$duplicated_records};
    my $batch_size = 200;
    while (@remaining_ids)
    {
        # do this in batches so we don't break DBI by having too many binding params
        my @ids = splice( @remaining_ids, 0, $batch_size );
        my $result;
        if ($real_delete)
        {
            $result = $db->run_statement(
                {
                    string => "DELETE FROM $table_name WHERE $id_field IN (" . join( ', ', map { '?' } @ids ) . ')',
                    params => [@ids],
                }
            );
        }
        else
        {
            $result = $db->run_statement(
                {
                    string => "UPDATE $table_name SET deleted = ? WHERE $id_field IN ("
                      . join( ', ', map { '?' } @ids ) . ')',
                    params => [ 'Y', @ids ],
                }
            );
        }

        print 'Deleting ' . ( $real_delete ? '(really!) ' : '' ) . ( scalar @ids ) . " records, result: $result\n";
    }
}