#!/usr/bin/perl

use strict;
use warnings;

use Chemistry::OpenSMILES qw( clean_chiral_centers );
use Chemistry::OpenSMILES::Stereo qw(
    chirality_to_pseudograph
    cis_trans_to_pseudoedges
    is_cis_trans_bond
    mark_all_double_bonds
);
use Chemistry::OpenSMILES::Parser;
use Chemistry::OpenSMILES::Writer qw( write_SMILES );
use Data::Dumper;
use File::Basename qw( basename );
use Getopt::Long::Descriptive;
use Graph 0.9723;
use Graph::Nauty qw( are_isomorphic canonical_order orbits );
use List::Util qw( any );
use SmilesScripts::DiffMessage qw(
    aggregate_messages
    message
    message_isomorphism
);

$Graph::Nauty::worksize = 12800;

my $basename = basename $0;
my( $opt, $usage ) = describe_options( <<"END" . 'OPTIONS',
USAGE
    $basename [<args>] [<files>]

DESCRIPTION
    $basename reads in files with three tab-separated columns:

    <ID> <SMILES_1> <SMILES_2>

    The program then attempts to detect isomorphism between <SMILES_1> and
    <SMILES_2> on each line. If immediate isomorphism is not detected,
    step-by-step simplifications are performed in order to lead both
    SMILES to isomorphism.

END
    [ 'check-isomorphism',
      'perform an extra isomorphism check on molecular entities detected ' .
      'as isomorphic by comparing their canonical representations' ],
    [],
    [ 'help', 'print usage message and exit', { shortcircuit => 1 } ],
);

if( $opt->help ) {
    print $usage->text;
    exit;
}

my $pair_moieties_options = {};
if( $opt->check_isomorphism ) {
    $pair_moieties_options->{check_isomorphism} = 1;
}

my $steps = [
    # Since removal of H atoms sometimes also dissolves chiral centers and
    # unsets cis/trans markers, removal of H atoms is performed later.
    { name => 'chirality',   func => \&remove_chirality },
    { name => 'cis/trans',   func => \&remove_bond_order,
                             args => { orders => [ '/', '\\' ] } },
    { name => 'charge',      func => \&remove_charge },
    { name => 'order',       func => \&remove_bond_order,
                             args => { orders => [ ':', '=', '#', '$' ] } },
    { name => 'aromaticity', func => \&remove_aromaticity },
    { name => 'H atoms',     func => \&remove_atoms,
                             args => [ 'H' ] },
    { name => 'atom types',  func => \&remove_atom_types },
];
my @checks = sort { my $aa = sprintf '%b', $a;
                    my $bb = sprintf '%b', $b;
                    ($a & (2**6))   <=> ($b & (2**6))   || # atom types is the last resort
                    ($a & (2**5))   <=> ($b & (2**5))   || # H atoms is quite destructive too
                    ($aa =~ s/1//g) <=> ($bb =~ s/1//g) || # least changes to the front
                     $a <=> $b }
             0..(2**@$steps)-1;

my $compositions_are_equal;

PAIR:
while( <> ) {
    next if /^#/;
    s/\n$//;

    my( $id, $first_smiles, $second_smiles ) = split "\t", $_;

    local $SIG{__WARN__} = sub {
        return if $_[0] =~ /can only process tetrahedral chiral centers/;
        print STDERR "$0: $ARGV($.) $id: $_[0]"
    };

    my( $first_moiety, $second_moiety );
    eval {
        $first_moiety  = parse_smiles( $first_smiles );
        $second_moiety = parse_smiles( $second_smiles );
    };
    if( $@ ) {
        print STDERR "$0: $ARGV($.) $id: $@";
        next;
    }

    my $nonmatching_single_moieties_achieved = 0;
    my @invariants;

    COMBINATION:
    for my $checks (@checks) {
        next if any { $checks & $_ } @invariants;

        # There is a problem with Graph 0.9723 causing loss of $.:
        # https://github.com/graphviz-perl/Graph/issues/26
        # Thus we have to go around it.
        my $line_no = $.;
        my $first_moiety_copy  = [ map { $_->deep_copy } @$first_moiety ];
        my $second_moiety_copy = [ map { $_->deep_copy } @$second_moiety ];
        $. = $line_no;

        my @modulo;
        for my $i (0..@$steps-1) {
            next unless $checks & (2**$i);
            # Do not perform the comparison unless at least one of the
            # molecules are touched by the simplification
            if( $steps->[$i]{func}( $first_moiety_copy,  $steps->[$i]{args} ) |
                $steps->[$i]{func}( $second_moiety_copy, $steps->[$i]{args} ) ) {
                 push @modulo, $steps->[$i]{name};
            } else {
                push @invariants, $checks;
                next COMBINATION;
            }
        }

        eval {
            pair_moieties( $first_moiety_copy,
                           $second_moiety_copy,
                           $pair_moieties_options );
        };
        if( $@ ) {
            print STDERR "$@"; # TODO better reporting here
            next;
        }

        # Checking for single nonmatching moieties
        $nonmatching_single_moieties_achieved |=
            scalar @$first_moiety_copy  == 1 &&
            scalar @$second_moiety_copy == 1;

        # There are still unmatched moieties on both sides, further
        # reductions are needed
        next if @$first_moiety_copy && @$second_moiety_copy;

        my $reason;
        if( @modulo ) {
            $reason = 'isomorphic modulo ' . join( ', ', sort @modulo );
        } else {
            $reason = 'isomorphic';
        }
        if( @$first_moiety_copy || @$second_moiety_copy ) {
            if( $reason eq 'isomorphic' ) {
                $reason =  'isomorphic modulo superfluous moieties';
            } else {
                $reason .= ', superfluous moieties';
            }
        }
        local $\ = "\n";
        print join "\t", $id, $first_smiles, $second_smiles, $reason;
        next PAIR;
    }

    my $reason = 'unknown';
    if( $nonmatching_single_moieties_achieved ) {
        $reason = 'nonmatching single moieties';
    }

    local $\ = "\n";
    print join "\t", $id, $first_smiles, $second_smiles, $reason;
    next;

    # TODO perform other checks
}

sub parse_smiles
{
    my ( $smiles ) = @_;
    my @smiles;
    eval {
        my $parser;
        $parser = Chemistry::OpenSMILES::Parser->new;
        @smiles = $parser->parse( $smiles, { max_hydrogen_count_digits => 2 } );

        for my $moiety (@smiles) {
            my @orbits = orbits( $moiety, \&write_SMILES );
            my %orbits;
            for my $orbit (0..$#orbits) {
                for (@{$orbits[$orbit]}) {
                    $orbits{$_} = $orbit;
                }
            }

            my @removed = clean_chiral_centers( $moiety,
                                                sub { $orbits{$_[0]} } );
            next unless @removed;
            warn scalar @removed . ' tetrahedral chiral center(s) with ' .
                 'less than 4 distinct neighbours were removed.' . "\n";
        }
    };
    if( $@ ) {
        $@ =~ s/\.?\n$//;
        die "error parsing '$smiles': $@.\n";
    } else {
        return \@smiles;
    }
}

sub compare_moieties
{
    my ( $first_moiety, $second_moiety, $options ) = @_;
    my @messages;
    for my $option ( @$options ) {
        my $message = $option->{func}(
            $first_moiety, $second_moiety, $option->{'args'}
        );
        if( defined $message && $message ) {
            push @messages, $message;
        }
        if( defined $message &&
            ( ! defined $message->type ||
              ( defined $message->type && $message->type eq 'isomorphism' ) ) ) {
            last;
        }
    }
    return @messages;
}

sub cleanup_empty_moieties
{
    my $removed = 0;
    for (@_) {
        my $count = scalar @$_;
        @$_ = grep { scalar $_->vertices } @$_;
        $removed += $count - scalar @$_;
    }
    if( $removed ) {
        warn "$removed empty moiety(es) removed from both " .
             "SMILES prior to comparison.\n";
    }
}

sub remove_atoms
{
    my ( $moieties, $atoms ) = @_;

    my $changed = 0;
    my @moieties_now;
    for my $moiety (@$moieties) {
        my $maybe_split_moiety;
        for my $vertex ($moiety->vertices) {
            next if ! any { ucfirst $vertex->{symbol} eq $_ } @$atoms;
            $maybe_split_moiety |= $moiety->degree( $vertex ) >= 2;
            for my $neighbour ($moiety->neighbours( $vertex )) {
                next unless Chemistry::OpenSMILES::is_chiral_tetrahedral( $neighbour );
                # All neighbouring chiral tetrahedral atoms have to lose
                # their chirality status
                delete $neighbour->{chirality};
                delete $neighbour->{chirality_neighbours};
            }
            $moiety->delete_vertex( $vertex );
            $changed = 1;
        }

        if( $maybe_split_moiety ) {
            my @connected = $moiety->connected_components;
            if( @connected == 1 ) {
                push @moieties_now, $moiety;
            } else {
                # Split the graph (moiety) in question into graphs each
                # consisting of a connected component. Graph module does
                # not contain a function or method to do so, thus it has
                # to be done using Graph::connected_components()
                for my $i (0..$#connected) {
                    my $moiety_now = $moiety->copy;
                    for my $j (0..$#connected) {
                        next if $i == $j;
                        $moiety_now->delete_vertices( @{$connected[$j]} );
                    }
                    push @moieties_now, $moiety_now;
                }
            }
        } else {
            push @moieties_now, $moiety;
        }
    }

    @$moieties = @moieties_now;

    return $changed unless $changed;

    cleanup_empty_moieties( $moieties );
    return $changed;
}

sub remove_charge
{
    my ( $moieties ) = @_;

    my $changed = 0;
    for my $moiety (@$moieties) {
        for my $vertex ($moiety->vertices) {
            next if ! exists $vertex->{charge};
            delete $vertex->{charge};
            $changed = 1;
        }
    }

    return $changed;
}

sub remove_chirality
{
    my ( $moieties ) = @_;

    my $changed = 0;
    for my $moiety (@$moieties) {
        for my $vertex ($moiety->vertices) {
            next if ! exists $vertex->{chirality};
            delete $vertex->{chirality};
            $changed = 1;
        }
    }

    return $changed;
}

sub remove_aromaticity
{
    my ( $moieties ) = @_;

    my $changed = 0;
    for my $moiety (@$moieties) {
        for my $vertex ($moiety->vertices) {
            next if $vertex->{symbol} eq ucfirst $vertex->{symbol};
            $vertex->{symbol} = ucfirst $vertex->{symbol};
            $changed = 1;
        }
    }

    return $changed;
}

sub remove_bond_order
{
    my ( $moieties, $options ) = @_;

    my $message = 'order';
    my @orders;

    $options = {} unless $options;
    $message = $options->{message} if $options->{message};
    @orders = @{$options->{orders}} if $options->{orders};

    my $changed = 0;
    for my $moiety (@$moieties) {
        for my $edge ($moiety->edges) {
            next if !$moiety->has_edge_attributes( @$edge );
            if( @orders &&
                !grep { $moiety->get_edge_attribute( @$edge, 'bond' ) eq $_ }
                      @orders ) {
                next;
            }
            $moiety->delete_edge_attributes( @$edge );
            $changed = 1;
        }
    }

    return $changed;
}

sub remove_atom_types
{
    my ( $moieties ) = @_;

    for my $moiety (@$moieties) {
        for my $vertex ($moiety->vertices) {
            $vertex->{symbol} = 'X';
        }
    }

    return 1;
}

sub remove_pendant_vertices
{
    my ( $first_moiety, $second_moiety ) = @_;

    my $change = 0;
    my $is_single = @$first_moiety == 1 && @$second_moiety == 1;
    for my $moiety (@$first_moiety, @$second_moiety) {
        my @pendants = grep { $moiety->degree($_) == 1 }
                            $moiety->vertices;
        # At least two vertices must remain after the removal, otherwise
        # the removal is not very meaningful.
        next if !@pendants;
        next if $moiety->vertices - @pendants < 2;
        $moiety->delete_vertices(@pendants);
        $change = 1;
    }

    return unless $change;

    eval {
        pair_moieties( $first_moiety, $second_moiety );
    };
    if( $@ ) {
        return { type => 'error', text => "$@" };
    }

    if( !@$first_moiety && !@$second_moiety ) {
        if( $is_single && $compositions_are_equal ) {
            return message(
                { text => "possible simple structural isomers" }
            );
        } elsif( $is_single ) {
            return message(
                { text => "isomorphic simple graphs modulo pendant " .
                             "vertices, different compositions" }
            );
        } else {
            return message(
                { text => "isomorphic simple graphs modulo pendant vertices" }
            );
        }
    }

    return;
}

sub chemical_composition
{
    my( $graph ) = @_;
    my %composition;
    for ($graph->vertices) {
        $composition{ucfirst $_->{symbol}}++;
    }
    return join ' ', map { $_ . $composition{$_} } sort keys %composition;
}

sub pair_moieties
{
    my( $A, $B, $options ) = @_;

    $options = {} unless $options;

    my %A_depictions;
    my %A_quantities;
    for (@$A) {
        my $depiction = canonical_depiction( $_ );

        $A_quantities{$depiction}++;
        push @{$A_depictions{$depiction}}, $_;

        if( $options->{check_isomorphism} &&
            $A_quantities{$depiction} > 1 &&
            !are_isomorphic( unpack_molecular_graph( $A_depictions{$depiction}[-2] ),
                             unpack_molecular_graph( $A_depictions{$depiction}[-1] ),
                             \&depict_unpacked_vertex ) ) {
            warn "graphs for '$depiction' were found to be not isomorphic\n";
        }
    }

    my %B_depictions;
    my %B_quantities;
    for (@$B) {
        my $depiction = canonical_depiction( $_ );

        $B_quantities{$depiction}++;
        push @{$B_depictions{$depiction}}, $_;

        if( $options->{check_isomorphism} &&
            $B_quantities{$depiction} > 1 &&
            !are_isomorphic( unpack_molecular_graph( $B_depictions{$depiction}[-2] ),
                             unpack_molecular_graph( $B_depictions{$depiction}[-1] ),
                             \&depict_unpacked_vertex ) ) {
            warn "graphs for '$depiction' were found to be not isomorphic\n";
        }
    }

    my( $only_A, $only_B, $common ) =
        comm( [ keys %A_depictions ], [ keys %B_depictions ] );
    if( !@$only_A && !@$only_B ) {
        for (@$common) {
            if( $A_quantities{$_} != $B_quantities{$_} ) {
                warn "different number of moieties of '$_', " .
                     "$A_quantities{$_} vs. $B_quantities{$_}\n";
            }

            if( $options->{check_isomorphism} &&
                !are_isomorphic( unpack_molecular_graph( $A_depictions{$_}[0] ),
                                 unpack_molecular_graph( $B_depictions{$_}[0] ),
                                 \&depict_unpacked_vertex ) ) {
                warn "graphs for '$_' were found to be not isomorphic\n";
            }
        }
    }
    @$A = map { @{$A_depictions{$_}} } @$only_A;
    @$B = map { @{$B_depictions{$_}} } @$only_B;
}

sub canonical_depiction
{
    my( $graph, $color_sub ) = @_;

    $color_sub = \&write_SMILES unless $color_sub;

    my $drop_chirality = sub {
        my( $vertex ) = @_;

        return '' unless %$vertex;

        my %atom = %$vertex;
        delete $atom{chirality};
        return $color_sub->( \%atom );
    };

    my $order_sub = sub {
        return exists $_[0]->{number} ? $_[0]->{number} : -1;
    };

    # FIXME: This code is copied from smi_canonicalise. A more effective
    # solution should be found instead of duplicating the code.

    my $copy  = unpack_molecular_graph( $graph );
    my @order = canonical_order( $copy, $drop_chirality, $order_sub );
    my %order;
    for (0..$#order) {
        $order{$order[$_]} = $_;
    }

    # Drop cis/trans markers from the input graph and mark them
    # anew.
    for my $bond ($graph->edges) {
        next unless is_cis_trans_bond( $graph, @$bond );
        $graph->delete_edge_attribute( @$bond, 'bond' );
    }
    mark_all_double_bonds( $graph,
                           sub {
                                if( $copy->has_edge( $_[0], $_[3] ) &&
                                    $copy->has_edge_attribute( $_[0], $_[3], 'pseudo' ) ) {
                                    return $copy->get_edge_attribute( $_[0], $_[3], 'pseudo' );
                                }
                           },
                           sub { return $order{$_[0]} } );

    my $smiles = write_SMILES(
        $graph,
        sub {
            my @sorted = sort { $order{$a} <=> $order{$b} }
                              keys %{$_[0]};
            return $_[0]->{shift @sorted};
        } );

    # A.M.: I cannot find a counter-example, thus the following seems
    # reasonable to me. In a SMILES descriptor, one can substitute all
    # '/' with '\' and vice versa, and retain correct cis/trans settings.
    if( $smiles =~ /([\/\\])/ && $1 eq '\\' ) {
        $smiles =~ tr/\/\\/\\\//;
    }

    return $smiles;
}

sub comm
{
    my( $A, $B ) = @_;

    my @A = sort @$A;
    my @B = sort @$B;

    my( @only_A, @only_B, @common );
    while( @A && @B ) {
        if( $A[0] eq $B[0] ) {
            push @common, shift @A;
            shift @B;
            next;
        }

        if( $A[0] lt $B[0] ) {
            push @only_A, shift @A;
            next;
        }

        if( $A[0] gt $B[0] ) {
            push @only_B, shift @B;
            next;
        }
    }
    push @only_A, @A;
    push @only_B, @B;

    return \@only_A, \@only_B, \@common;
}

# "Unpacks" parsed SMILES graph by converting cis/trans bonds to
# pseudoedges and chirality markers to pseudovertices with their own
# pseudoedges.
sub unpack_molecular_graph
{
    my( $graph ) = @_;

    my $copy = $graph->copy;
    for my $bond ($graph->edges) {
        next unless $graph->has_edge_attribute( @$bond, 'bond' );
        $copy->set_edge_attribute( @$bond,
                                   'bond',
                                   $graph->get_edge_attribute( @$bond, 'bond' ) );
    }
    cis_trans_to_pseudoedges( $copy );
    chirality_to_pseudograph( $copy );

    return $copy;
}

sub depict_unpacked_vertex
{
    my( $vertex ) = @_;

    if( ref $vertex eq 'HASH' && exists $vertex->{symbol} ) {
        $vertex = { %$vertex };
        delete $vertex->{chirality};
        return write_SMILES( $vertex );
    }

    return Dumper $vertex;
}
