#!/usr/bin/perl -w

# Creates x.iacc and x.io from x.src
# usage:
#     aux/mkfiles compiler source destination
#     aux/mkfiles object source destination
# this needs to run with cwd == top-level CLC-INTERCAL-Base distribution;
# to generate objects it needs the blib/lib directory as it uses Interpreter
# which is generated

# This file is part of CLC-INTERCAL

# Copyright (c) 2008, 2023 Claudio Calvelli, all rights reserved.

# CLC-INTERCAL is copyrighted software. However, permission to use, modify,
# and distribute it is granted provided that the conditions set out in the
# licence agreement are met. See files README and COPYING in the distribution.

use strict;

use Cwd 'cwd';
use File::Spec::Functions qw(splitpath catfile);

# we do not want to depend on any generated modules when making compilers
require (catfile(cwd(), qw(INTERCAL Generate.pm)));
# Generate already requires Exporter by path so we better not do it too
import Language::INTERCAL::Exporter '1.-94.-2.4', qw(compare_version);

# copied from ByteCode.pm, ought to be from data
use constant NUM_OPCODES => 0x80;
use constant BYTE_SIZE     => 8;
use constant OPCODE_RANGE  => 1 << BYTE_SIZE;
use constant BYTE_SHIFT    => OPCODE_RANGE - NUM_OPCODES;

use vars qw($VERSION $PERVERSION);
($VERSION) = ($PERVERSION = "CLC-INTERCAL/Base aux/mkfiles 1.-94.-2.4") =~ /\s(\S+)$/;

sub BC ($);

my $verbose = 1;
if (@ARGV && $ARGV[0] eq '--quiet') {
    $verbose = 0;
    shift @ARGV;
}

@ARGV == 3 or die "Usage: mkfiles compiler|object SOURCE DESTINATION\n";
my ($mode, $sourcefile, $destfile) = @ARGV;
$mode = lc($mode);
$mode eq 'compiler' || $mode eq 'object'
    or die "Invalid mode: $mode\n";

{
    no warnings;
    $Language::INTERCAL::Generate::verbose = 0;
}

my @getsplats = (
    '@@DATA Splats@@',
    '@@ALL SPLATS NUMBER@@ @@NAME@@',
);

my %splatnumber;
for my $sn (Language::INTERCAL::Generate::Convert(@getsplats)) {
    $sn =~ /^\s*(\d+)\s+(\S+)\s*$/ and $splatnumber{$2} = $1;
}

my @getbytecode = (
    '@@DATA ByteCode@@',
    '@@ALL OPCODES NUMBER@@ @@NAME@@',
);

my %bc_right = ();
my %bc_number = ();
for my $bc (Language::INTERCAL::Generate::Convert(@getbytecode)) {
    $bc =~ /^\s*(\d+)\s+(\S+)\s*$/ or next;
    my ($number, $name) = ($1, $2);
    $bc_number{$name} = $number;
    $bc_right{$name} = pack('C*', BC(4), BC(1), $number);
}

my @getregisters = (
    '@@DATA ByteCode@@',
    '@@ALL DOUBLE_OH_SEVEN NAME@@ DOS % @@NUMBER@@',
    '@@ALL WHIRLPOOL NAME@@ WHP @ @@NUMBER@@',
    '@@ALL SHARK_FIN NAME@@ SHF ^ @@NUMBER@@',
);

my %reg_obj = ();
my %reg_right = ();
my %reg_re = ();
for my $rp (Language::INTERCAL::Generate::Convert(@getregisters)) {
    $rp =~ /^\s*(\S+)\s+(\S+)\s+(\S)\s+(\d+)\s*$/ or next;
    my ($reg, $code, $prefix, $number) = ($1, $2, $3, $4);
    my @code = ($bc_number{$code}, BC($number));
    my $name = $prefix . $reg;
    $reg_obj{$name} = pack('C*', @code);
    $reg_right{$name} = pack('C*', BC(4), BC(scalar @code), @code);
    push @{$reg_re{$prefix}}, $reg;
}
my $registers = join('|', map { quotemeta($_) . '\s*(?:' . join('|', @{$reg_re{$_}}) . ')' } keys %reg_re);
my $is_assignment = qr/^($registers)\s*<\s*-\s*/;

my $exit = 0;
open(AUX, '<', $sourcefile) or die "$sourcefile: $!\n";
my $sourcename = (splitpath($sourcefile))[2];
my $destname = (splitpath($destfile))[2];
my $source = '';
my $start = 0;
my @code = ();
my %flags = ();
my $dv;
{
    # XXX we ought to process the file twice, once to see what data
    # XXX it uses, and once to replace the version
    (my $name = $sourcename) =~ s/\.\w+$//;
    if ($name eq 'asm' || $name eq 'iacc' || $name eq 'sick') {
	my @dv = ('@@DATA ByteCode@@', '@@VERSION@@');
	($dv) = Language::INTERCAL::Generate::Convert(@dv);
	if ($name eq 'sick' || $name eq 'iacc') {
	    my @sv = ('@@DATA Splats@@', '@@VERSION@@');
	    my ($sv) = Language::INTERCAL::Generate::Convert(@sv);
	    compare_version($dv, $sv) < 0 and $dv = $sv;
	}
    }
}
LINE: while (<AUX>) {
    chomp;
    s/\xc2([\x80-\xbf])/$1/g;
    if (s#\\\s*$##) {
	$_ .= <AUX>;
	redo LINE;
    }
    next if /^\s*$/ || /^\s*#/;
    my $orig = $_;
    if (s/^!//) {
	if (m#\baux/\Q$sourcename\E\s*(\S+)\b#) {
	    my $sv = $1;
	    defined $dv and compare_version($sv, $dv) < 0 and $sv = $dv;
	    s#\baux/\Q$sourcename\E(\s*)\S+\b#INTERCAL/Include/$destname$1$sv#g;
	}
	$source .= $_ . "\n";
	push @code, sts($start, length($source) - $start) .
		    pack('C*', $bc_number{NOT});
	$start = length $source;
	next LINE;
    }
    if (s/\?(\S+)\s*<\s*-\s*\?(\S+)//i) {
	my $flag = $1;
	my $value = $2;
	$source .= fold("DO ?$flag <- ?$value");
	$flags{$flag} = $value;
	if (/\S/) {
	    print "??? $_\n";
	    $exit = 1;
	}
	push @code, sts($start, length($source) - $start) .
		    pack('C*', $bc_number{NOT});
	$start = length $source;
	next;
    }
    if (s/$is_assignment//) {
	my $reg = $1;
	my ($expr, $code) = extract_expression();
	if ($expr ne '') {
	    $source .= fold("DO $reg <- $expr");
	    push @code, sts($start, length($source) - $start) .
			chr($bc_number{STO}) . $code . $reg_obj{$reg};
	    if (/\S/) {
		print "??? $_\n";
		$exit = 1;
	    }
	} else {
	    print "??? $orig\n";
	    $exit = 1;
	}
	$start = length $source;
	next;
    }
    if (s/^(\S+)\s+//) {{
	my $sname = $1;
	my $symbol = $sname =~ /^\w+$/ ? "?$sname" : const('?', $sname);
	my $stmt = "DO CREATE _2 $symbol";
	my %left = ();
	my ($left, @left) = extract_left(\%left);
	last if $left eq '';
	$stmt .= $left;
	my $code = pack('C*', $bc_number{CRE}, BC(2), $bc_number{STR}, BC(length $sname)) .
		   $sname .
		   pack('C*', BC(scalar @left)) .
		   join('', @left);
	if (s/^:\s*//) {
	    my ($right, @right) = extract_right(\%left);
	    last if $right eq '';
	    $stmt .= " AS " . $right;
	    $code .= pack('C*', BC(scalar @right)) . join('', @right);
	}
	$source .= fold($stmt);
	$code = sts($start, length($source) - $start) . $code;
	push @code, $code;
	$start = length $source;
	next LINE unless /\S/;
	print "??? $_\n";
	$exit = 1;
	next LINE;
    }}
    print "??? $orig\n";
    $exit = 1;
}
$source .= "\n";
$source .= fold('DO GIVE UP');
push @code, sts($start, length($source) - $start) . chr($bc_number{GUP});
close AUX;
die "Error in $sourcename\n" if $exit;
if ($mode eq 'compiler') {
    # need to make sure not to update the file if there are no changes
    if (open(SRC, '<', $destfile)) {
	local $/ = undef;
	my $prev = <SRC>;
	close SRC;
	if ($prev eq $source) {
	    $verbose and print "No changes to $destfile\n";
	    exit 0;
	}
    }
    open(SRC, '>', $destfile) or die "$destfile: $!\n";
    print SRC $source or die "$destfile: $!\n";
    close SRC or die "$destfile: $!\n";
} else {
    # we've already loaded Exporter... but Interpreter doesn't know
    $INC{'Language/INTERCAL/Exporter.pm'} = $INC{catfile(cwd(), qw(INTERCAL Exporter.pm))};
    require Language::INTERCAL::Interpreter;
    import Language::INTERCAL::Interpreter '1.-94.-2.4';
    my $int = new Language::INTERCAL::Interpreter();
    $int->object->setbug(0, 0);
    $int->object->clear_code;
    $int->object->unit_code(0, $source, length($source), \@code);
    for my $f (keys %flags) {
	$int->object->add_flag($f, $flags{$f});
    }
    my $fh = new Language::INTERCAL::GenericIO('FILE', 'r', $destfile);
    $int->read($fh, 0);
}

sub extract_expression {
    return ("#$1", pack('C*', BC($1))) if s/^(\d+)\s*//;
    if (s/^\?(\w+)\s*//) {
	return ("?$1", pack('C*', $bc_number{STR}, BC(length $1)) . $1);
    }
    if (s/^\?(\S+)\s*//) {
	return (",?" . join(' + ', map {"#$_"} unpack('C*', $1)) . ",",
		pack('C*', $bc_number{STR}, BC(length $1)) . $1);
    }
    ('', '');
}

sub const {
    my ($prefix, $data) = @_;
    return ",$data," if $prefix eq '' && $data =~ /^\w+$/;
    return ",$prefix" . join(' + ', map {"#$_"} unpack("C*", $data)) . ",";
}

sub extract_left {
    my ($left) = @_;
    s/^\s+//;
    s/^:/"" :/;
    my $ret = '';
    my @ret = ();
    while (/^./) {
	last if /^[:<]/;
	my ($lp, $lc) = left_production($left);
	return ('') if $lp eq '';
	$ret .= ' ' . $lp;
	my $count = 0;
	if (s/^=\s*(\d+)\s*//) {
	    $ret .= "=$1";
	    $count = $1;
	} elsif (s/^=\s*\*\s*//) {
	    $ret .= '=*';
	    $count = 65535;
	}
	push @ret, pack('C*', BC($count)) . $lc;
    }
    ($ret, @ret);
}

sub left_production {
    my ($left) = @_;
    if (s/^"([^"]*)"\s*// || s/^'([^']*)'\s*//) {
	my $string =  $1;
	return (const('', $string),
		pack('C*', BC(1), $bc_number{STR}, BC(length $string)) . $string);
    }
    if (s/^(\w[^\s=]*)\s*//) {
	my $symbol = $1;
	$left->{$symbol}++;
	my $code = pack('C*', BC(0), $bc_number{STR}, BC(length $symbol)) . $symbol;
	return ("?$symbol", $code) if $symbol =~ /^\w+$/;
	return (const('?', $symbol), $code);
    }
    ('', '');
}

sub extract_right {
    my ($left) = @_;
    s/^\s+//;
    return (",,", pack('C*', BC(4), BC(0))) if /^$/;
    my $ret = '';
    my @ret = ();
    while (/^./) {
	last if s/^:\s*//;
	my ($rp, $rc) = right_production($left);
	return ('') if $rp eq '';
	$ret .= ' + ' if $ret ne '';
	$ret .= $rp;
	push @ret, $rc;
    }
    ($ret, @ret);
}

sub right_production {
    my ($left) = @_;
    return right_decode($1) if s/^"([^"]*)"\s*//;
    return right_decode($1) if s/^'([^']*)'\s*//;
    if (s/^(\d+)\s*//) {
	my @code = BC($1);
	return ("#$1", pack('C*', BC(4), BC(scalar @code), @code));
    }
    if (s/^\*(\w+)\s*//) {
	my $splat = $1;
	exists $splatnumber{$splat} or return ('', '');
	$splat = $splatnumber{$splat};
	my @scode = BC($splat);
	return ("#$splat", pack('C*', BC(4), BC(scalar @scode), @scode));
    }
    return ("*", pack('C*', BC(15))) if s/^\*\s+// || s/^\*$//;
    if (s/^(\!?)(\w\S*)\s*//) {
	my $bang = $1;
	my $symbol = $2;
	my $number = 1;
	if ($symbol =~ s/=(\d+)$//) {
	    $number = $1;
	} elsif (exists $left->{$symbol} && $left->{$symbol} > 1) {
	    print "$.: warning: assuming $symbol=1\n";
	}
	return ('', '') if ! exists $left->{$symbol} || $left->{$symbol} < $number;
	my $code = pack('C*', BC($bang eq '' ? 0 : 6), BC($number),
			      $bc_number{STR}, BC(length $symbol)). $symbol;
	if ($symbol =~ /^\w+$/) {
	    return ("?$symbol #$number", $code) if $bang eq '';
	    return ("$bang$symbol #$number", $code);
	}
	return (const($bang eq '' ? '?' : $bang, $symbol) . " #$number", $code);
    }
    ('', '');
}

sub right_decode {
    my ($n) = @_;
    return ($n, $reg_right{$n}) if exists $reg_obj{$n};
    return ($n, $bc_right{$n}) if exists $bc_right{$n};
    ('', '');
}

sub fold {
    my ($text) = @_;
    my $res = "\t";
    $text =~ s/^\s*//;
    my $indent = '';
    $indent = ' ' x length($1) if $text =~ /^(\S+\s*)/;
    my $len = 0;
    while ($text =~ s/^(\S+)\s*//) {
	my $add = ($len ? ' ' : '') . $1;
	if (length($add) + $len >= 64) {
	    $res .= "\n\t" if $len;
	    $len = 0;
	    $add =~ s/^\s*/$indent/;
	}
	$res .= $add;
	$len += length($add);
    }
    $text . $res . "\n";
}

sub sts {
    my ($before, $after) = @_;
    pack('C*', $bc_number{STS}, BC($before), BC($after), BC(0), BC(0));
}

sub BC ($) {
    my ($val) = @_;
    $val < BYTE_SHIFT
        and return ($val + NUM_OPCODES);
    $val < OPCODE_RANGE
        and return ($bc_number{HSN}, $val);
    my $div = int($val / OPCODE_RANGE);
    $div < OPCODE_RANGE
        and return ($bc_number{OSN}, $div, $val % OPCODE_RANGE);
    die "Internal error: BC: value does not fit in 1 spot\n";
}

