#!/local/perl586/bin/perl -w
my $automcdir = "/export/home/svn-trunk/masses/rule-qa/automc";

# need this to ensure that 'svn log' will include ALL changes
my $svn_checkins_root = "http://svn.apache.org/repos/asf/spamassassin/";

# we won't provide who-checked-in and commit-message details for changes
# older than this.  Note, this is not in rev number terms; it's an
# absolute count of revisions.
my $svn_log_limit = 500;

my $full_rebuild = 0;
if ($ARGV[0] && $ARGV[0] =~ /^-f/) {
  $full_rebuild = 1;
}

use XML::Simple;

use strict;
use bytes;
use POSIX qw(strftime);

my $myperl = $^X;

open (CF, "<$automcdir/config");
my %conf; while(<CF>) { /^(\S+)=(\S+)/ and $conf{$1} = $2; }
close CF;

die "no conf{html}: $conf{html}" unless (-d $conf{html});

my $svn_log;

# all known date/revision combos.  warning: could get slow in future
my @daterevs = get_all_daterevs();

foreach my $dr (@daterevs) {
  my $drdir = get_datadir_for_daterev($dr);

  # this one is always rebuilt.
  # print "$drdir/fastinfo.xml: creating...\n";
  my $fastinfo = get_fastinfo($dr, $drdir);
  if (!defined $fastinfo) { next; }

# always rewrite
  {
    open (OUT, ">$drdir/fastinfo.xml") or die "cannot write $drdir/fastinfo.xml";
    print OUT $fastinfo;
    close OUT or die "failed to write to $drdir/fastinfo.xml";
    chmod 0666, "$drdir/fastinfo.xml"; # or warn "failed to chmod $drdir/fastinfo.xml";
  }

  # this one is only built if it doesn't already exist, because
  # it's quite expensive to build
  if (!$full_rebuild && -f "$drdir/info.xml" && -s _) {
    # print "$drdir/info.xml: good\n";
    next;
  }

  if (!-e "$drdir/info.xml" || -s "$drdir/info.xml" == 0) {
    print "$drdir/info.xml: creating...\n";
    my $info = get_info($dr, $drdir);
    open (OUT, ">$drdir/info.xml") or die "cannot write $drdir/info.xml";
    print OUT $info;
    close OUT or die "failed to write to $drdir/info.xml";
    chmod 0666, "$drdir/info.xml"; # or warn "failed to chmod $drdir/info.xml";
  }
}

sub get_info {
  my ($dr, $drdir) = @_;

  $dr =~ /^(\d+)[-\/]r(\d+)-(\S+)$/;
  my $date = $1;
  my $rev = $2;
  my $tag = $3;

  my $info = {
  };

  if (!$svn_log) {
    get_svn_log();
  }

  foreach my $logentry (@{$svn_log->{logentry}}) {
    next if ($logentry->{revision} > $rev);
    $info->{msg} = $logentry->{msg} || '';
    $info->{checkin_rev} = $logentry->{revision} || '';
    $info->{checkin_date} = $logentry->{date} || '';   # '2005-10-31T04:20:10.686485Z'
    $info->{author} = $logentry->{author} || '';
    last;
  }

  return XMLout($info);
}

sub get_fastinfo {
  my ($dr, $drdir) = @_;

  $dr =~ /^(\d+)[-\/]r(\d+)-(\S+)$/;
  my $date = $1;
  my $rev = $2;
  my $tag = $3;

  if (!defined $tag) {
    warn "dir with no tag! ignored: $dr\n";
    return;
  }

  my $info = {
    date => $date,
    rev => $rev,
    tag => $tag,
    includes_net => 0
  };

  if (-f "$drdir/NET.all") {
    $info->{includes_net} = 1;
  }

  $info->{mclogmds} = '';
  $info->{submitters} = '';
  if (-f "$drdir/DETAILS.all") {
    read_submitters("$drdir/DETAILS.all", $info);
  }
  elsif (-f "$drdir/NET.all") {
    # this can happen when a weekly mass-check shares a rev with a previous or
    # later night's nightly m-c; all the non-weekly data winds up under another
    # date.
    read_submitters("$drdir/NET.all", $info);
  }

  return XMLout($info);
}

sub read_submitters {
  my ($fname, $info) = @_;

  open (IN, "<$fname") or warn "cannot read $fname";
  while (<IN>) {
    # spam results used for 329933 DETAILS all: spam-mc-fast.log spam-foo.log
    if (/^# spam results used for .+?: (.*)$/) {
      my $sub = $1;
      $sub =~ s/\bspam-/ /gs;
      $sub =~ s/\.log\b/ /gs;
      $sub =~ s/\s+/ /gs;
      $sub =~ s/^ //gs;
      $sub =~ s/ $//gs;
      $info->{submitters} = $sub;
    }
    if (/^#\s*(<mclogmds>.*<\/mclogmds>)/) {
      $info->{mclogmds} = XMLin($1); # incorporate raw XML
    }
    last if (!/^#/);        # don't want anything after the comments end
  }
  close IN;
}

sub get_all_daterevs {
  return sort map {
      s/^.*\/(\d+)\/(r\d\S+)$/$1-$2/; $_;
    } grep { /\/(\d+\/r\d\S+)$/ && -d $_ } (<$conf{html}/2*/r*>);
}

sub get_datadir_for_daterev {
  my $npath = shift;
  $npath =~ s/-/\//;
  return $conf{html}."/".$npath."/";
}

sub get_svn_log {
  print "getting svn log... (".time.")\n";

  if (open (IN, "svn log --limit $svn_log_limit --xml $svn_checkins_root |"))
  {
    eval {
      my $xml = join('', <IN>);
      $svn_log = XMLin($xml);
    };
    if ($@) {
      die "svn xml: $@";
    }
    close IN or die "svn failed: $!";
  }
  if (!$svn_log) {
    die "no svn log --xml";
  }
  print "got ".(scalar @{$svn_log->{logentry}})." log entries (".time.")\n";

  # use Data::Dumper; print Dumper($svn_log); die;
}

