#!/ms/dist/perl5/bin/perl5.8

use strict;

BEGIN { do 'tests.pl'; }

use constant BG1     => '#9999ff';
use constant BG2     => '#ffffff';
use constant BGDARK1 => '#cc99cc';
use constant BGDARK2 => '#ff9999';
use constant BG      => { BG1() => BG2, BG2() => BG1 };
use constant BGDARK  => { BG1() => BGDARK1, BG2() => BGDARK2 };

sub report {
  my ($dir, $tests) = @_;
  my $colspan = ($dir eq GOODDIR ? 2 : 1);

  my @headers = ();
  my %results = ();

  for my $test (@$tests) {
    my ($word1, $word2) = @$test;

    push @headers, "$word1<br>$word2";

    open IN, "<", "$dir/${word1}_$word2";
    while (<IN>) {
      chomp;
      my ($program, $ok, $steps, $time) = split /\|/, $_;

      $steps = "X" unless $ok;
      push @{$results{$program}}, "$steps|$time";
    }
    close IN;
  }

  print "<table border=1>\n";
  print "<tr><td>&nbsp;</td>";
  print "<td align=center colspan=$colspan><b>$_</b></td>" for @headers;
  print "</tr>\n";

  my $bg = BG2;

  for my $result (sort keys %results) {
    print "<tr bgcolor=$bg><td>$result</td>";

    for (@{$results{$result}}) {
      my ($steps, $time) = split /\|/, $_;
      $time = sprintf ("%0.03f", $time)
        unless ($time eq "**");

      my $bgdark = BGDARK->{$bg};
      print "<td align=right bgcolor=$bgdark>$steps</td>" if ($colspan == 2);
      print "<td align=right>$time</td>";
    }

    print "</tr>\n";

    $bg = BG->{$bg};
  }

  print "</table>\n";
}

open OUT, ">", "report.html";
select OUT;

print <<HTML;
All tests were executed on a dual-processor Xeon 3 GHz machine with 6
GB of RAM. Perl programs were run with 5.8.4, and Python programs were
run with 2.4a2. Programs that use Inline were run at least once before
the test and their libraries cached, so the test time does not include
compilation. The "Web2" dictionary was used for all tests. <p>

The following submissions were not tested: <p>

<ul>
  <li> greg_bacon.scheme - I don't have a Scheme interpreter
  <li> ingo_blechschmidt.pir - I don't have parrot
  <li> david_b.pl - Blows all of my machine's RAM on "love hate"
</ul><p>

<i>Ron Isaacson, 2004/09/01</i><p>

<hr><p>

The following tests are expected to fail: <p>

<ul>
  <li> There is no ladder for "love 10th" or "worship justice".
  <li> "zzzz" is not in the dictionary, and "love zesty" are different
       lengths.
</ul><p>

The table shows the amount of time, in seconds, it took for the
program to complete (or '**' if it took more than 10 minutes and was
aborted). <p>

HTML

report (BADDIR, BAD);

print <<HTML;
<p><hr><p>

The following tests are expected to succeed. <p>

<ul>
  <li> "love love" tests a degenerate case that not everybody accounted
       for.
  <li> "ruby code" succeeds only for case-insensitive searches.
  <li> "alcae" doesn't appear in the dictionary, but "Alcae" does;
       "alcae zesty" succeeds only if the case-insensitivity applies
       to the start/stop words as well.
</ul><p>

For each program, the table shows two things: the number of steps in
the ladder (or 'X' if the program failed); and the amount of time, in
seconds, the program took to complete (or '**' if it took more than 10
minutes and was aborted). <p>

HTML

report (GOODDIR, GOOD);
