#! /usr/bin/perl
use warnings;
use strict;
use integer;
use FindBin;
use lib $FindBin::RealBin;
use Def;

# This helper script ensures that ram titles are consistent and correct
# throughout debram.txt, that the titles are indented and ordered
# correctly, and that the cross-references are sane.
#
#     usage: check-ram-titles [-ch] { debram.txt }
#
# If no debram.txt filename is supplied, the script examines the main
# debram.txt file in the expected place relative to the script.  The -c
# option yields a ram count.
#
#

our $usage = <<END;
usage: $0 [-csh] { debram.txt }
    -c count rams
    -s run on cmdsel.txt, instead
    -h print this usage message
END

my $linepat;
{
  my $p1 = qr/\d{${Def::ndig}}/o;
  my $p2 = qr/\S(?:.*?\S)??/o;
  $linepat = qr/^($p1) ( *)($p2)(?: (-+>) ($p2))?\s*$/o;
}
my $shortbar_pat;
my $longbar_pat ;
{
  my $w1 = $Def::width_shortbar    ;
  my $w2 = $Def::width_shortbar + 1;
  $shortbar_pat = qr /^-{$w1}$/o;
  $longbar_pat  = qr /^-{$w2}/o ;
}

# Read command-line arguments and options.
my @opt;
my @arg;
push @{ /^-\S/ ? \@opt : \@arg }, $_ for @ARGV;
my %opt = map {
  my $o = $_;
  map { substr( $o, $_, 1 ) => 1 } 1 .. length($o)-1
} @opt;
if ( $opt{'?'} || $opt{h} || @arg > 1 ) {
  print $usage;
  exit 0;
}
my $debram;
$debram = shift @arg if @arg;
$debram = $opt{s} ? $Def::cmdsel_txt : $Def::debram_txt
  unless defined $debram;

my @r1; # head titles
my @r2; # body titles
my @r3; # tail titles

my $pat_count = $opt{s} ? qr// : qr/ \(\d+\)/;

sub ramlvl ($) { $Def::ndig - @{ [ $_[0] =~ /0/g ] } }

# Parse a title line from head or tail.
sub parse ($@) {

  # Are these main-body title lines?
  my $isbody = shift;

  # Divide each line into fields.
  my @line;
  for ( @_ ) {
    my( $ram, $sp, $title, $xrefs, $ramlvl, $uctitle, @xref );
    if ( $isbody ) {
      ( $ram, $title ) =
        /^(\d{${Def::ndig}}) (\S(?:.*?\S)??)${pat_count}\s*$/o
        or die
        "$0: parse: badly formed title line in the main body\n$_\n";
    }
    else {
      my $arrow;
      ( $ram, $sp, $title, $arrow, $xrefs ) = /$linepat/o
        or die "$0: parse: badly formed ToC/xref line\n$_\n";
      if ( defined $arrow ) {
        length($ram) + length($sp) + length($title)
          + length($arrow) + 2 == $Def::pos_arrowhead
          or die "$0: parse: arrow too short or too long\n$_\n";
      }
      defined( $xrefs ) or $xrefs = '';
      @xref = split ' ', $xrefs;
    }
    $ramlvl = ramlvl $ram;
    length( $title ) <= $Def::maxlen_title
      or die "$0: parse: title too long\n$_\n";
    $uctitle = uc $title;
    if ( $isbody ) {
      $uctitle eq $title
        or die "$0: parse: lower-case char in title\n$_\n";
    }
    else {
      my $ramlvl1 = $ramlvl + ( $opt{s} && $ramlvl > 1 ? -1 : 0 );
      length( $sp ) == $Def::ind_shift * ($ramlvl1-1)
        or die "$0: parse: bad indent in\n$_\n";
    }
    push @line, {
      isbody  => $isbody,
      ram     => $ram,
      ramlvl  => $ramlvl,
      title   => $title,
      uctitle => $uctitle,
      xref    => \@xref,
    };
  }

  # Mark each line as endram or parent ram.  Check ram ordering.
  for my $i ( 0 .. $#line-1 ) {
    $line[$i+1]{ram} gt $line[$i]{ram}
      or die "$0: parse: misordered rams near $line[$i]{ram}\n";
    $line[$i]{endram} = $isbody
      || $line[$i+1]{ramlvl} <= $line[$i]{ramlvl};
  }
  $line[-1]{endram} = 1 if @line;

  # Hash the rams.
  my %ram = map { $_->{ram} => $_ } @line;

  # Guard against stray or unsorted cross-references.
  for my $ram ( keys %ram ) {
    my $ram1 = $ram{$ram};
    my $parent = $ram;
    $parent =~ s/^(.*)[^0]/${1}0/;
    my $ram_prev;
    my $n_match_prev = $Def::ndig;
    for my $xref ( @{ $ram1->{xref} } ) {
      $ram{$xref}
        or die "$0: parse: xref to unknown ram $xref\n";
      $xref eq $parent
        and die "$0: parse: xref to ${ram}'s immediate parent\n";
      my $errmsg
        = "$0: parse: misordered xrefs from $ram\n";
      my $j = 0; # count of matching digits.
      ++$j while $j < $Def::ndig
        && substr( $ram, $j, 1 ) eq substr( $xref, $j, 1 );
      $j < $Def::ndig && $j <= $n_match_prev or die $errmsg;
      if ( $j < $n_match_prev ) {
        $n_match_prev = $j;
        $ram_prev     = $xref;
        next;
      }
      defined( $ram_prev ) && $ram_prev ge $xref and die $errmsg;
      $ram_prev = $xref;
    }
  }

  return %ram;

}

# Read the title lines in from each of debram.txt's three sections:
# head; body; tail.
open F, '<', $debram;
1 while !eof(F) && <F> ne (
  $opt{s} ? $Def::mark_commands : $Def::mark_toc
);
unless ( $opt{s} ) { <F>; <F>; }
{
  my $wasblank = 0;
  while ( !eof(F) ) {
    local $_ = <F>;
    if ( my( $ram ) = /^(\d{${Def::ndig}})/o ) {
      push @r1, $_;
      if ( $opt{s} ) {
        my $ramlvl = ramlvl $ram;
        ( $ramlvl <= 1 && $wasblank == 1 ) ||
        ( $ramlvl >  1 && $wasblank == 0 )
          or die
          "$0: blank line is wrong above ram $ram in the Contents\n";
        $wasblank = 0;
      }
    }
    elsif ( $opt{s} ) {
      if    ( !/\S/                   ) { ++$wasblank   }
      elsif (  /^\*{${Def::ndig}} \S/ ) { $wasblank = 0 }
      else                              { last          }
      $wasblank <= 1
         or die "$0: the Contents have a double blank line\n";
    }
    else { last }
  }
}
1 while !eof(F) && <F> ne (
  $opt{s} ? $Def::mark_commands : $Def::mark_main_body
);
{
  my $intitle = 0;
  while ( !eof(F) ) {
    local $_ = <F>;
    $intitle = !$intitle, next if /$longbar_pat/o;
    next unless $intitle;
    if    (    /^\d{${Def::ndig}}/o ) { push @r2, $_ }
    elsif ( !( $opt{s} && /\S/    ) ) { last         }
  }
}
unless ( $opt{s} ) {
  for ( 1, 2 ) { 1 while !eof(F) && <F> =~ /\S/ }
  {
    while ( !eof(F) ) {
      local $_ = <F>;
      if ( /^\d{${Def::ndig}}/o ) { push @r3, $_ }
      else { last }
    }
  }
}
!eof(F) or die "$0: the file is missing some parts or is not even a "
  . ( $opt{s} ? 'cmdsel.txt' : 'debram.txt' ) . "\n";
close F;

# Parse the three sections:
my %r1 = parse 0, @r1;
my %r2 = parse 1, @r2;
my %r3 = parse 0, @r3;

# Ensure that the rams in each section are the same, with the same
# titles, and that no title is duplicated.
for my $r (
  $opt{s}
  ? ( [ \%r1 ] )
  : ( [ \%r1, \%r3 ], [ \%r3, \%r1 ] )
) {
  my( $ra, $rb ) = @$r;
  for my $ram ( sort keys %$ra ) {
    my $ram1    = $ra->{$ram};
    my $title   = $ram1->{title};
    my $uctitle = $ram1->{uctitle};
    my $endram  = $ram1->{endram};
    (
      !defined($rb) ||
      ( $rb->{$ram} && $title eq $rb->{$ram}{title} )
    ) && (
      !$endram ||
      ( $r2{$ram} && $uctitle eq $r2{$ram}{title} )
    ) or die "$0: title mismatch, ram $ram\n";
  }
}
for my $ram ( sort keys %r2 ) {
  my $ram1    = $r2{$ram};
  my $title   = $ram1->{title};
  (     $r1{$ram} && $r1{$ram}{endram} && $title eq $r1{$ram}{uctitle} )
    && (
      $opt{s} ||
      ( $r3{$ram} && $r3{$ram}{endram} && $title eq $r3{$ram}{uctitle} )
    )
    or die "$0: title mismatch, ram $ram\n";
}
{
  my %uctitle = ();
  for my $ram ( keys %r1 ) {
    my $uctitle = $r1{$ram}{uctitle};
    exists $uctitle{$uctitle}
      and die "$0: duplicate title\n$uctitle\n";
    $uctitle{$uctitle} = $ram;
  }
}

# Check that no ram lacks a parent.
for my $ram ( keys %r1 ) {
  my $parent = $ram;
  $parent =~ s/^(0*[1-9]\d*)([1-9])(0*)/${1}0${3}/;
  $r1{$parent} or die "$0: ram $ram lacks a parent $parent\n";
}

# Print ram counts if -c.
if ( $opt{c} ) {
  my $n_end    = 0;
  my $n_parent = 0;
  ++( $_->{endram} ? $n_end : $n_parent ) for values %r1;
  printf
    "%${Def::ndig_ram_count}d end rams\n"    .
    "%${Def::ndig_ram_count}d parent rams\n" .
    "%${Def::ndig_ram_count}d total rams\n",
    $n_end, $n_parent, $n_end + $n_parent;
}

