#!/usr/bin/perl -w
#
# Convert HTML on standard input to multiple text files containing MCONV
# attributes on standard output. No attempt is made to do alignment to
# centre, right, or fully justified - all text is left aligned.
#
# This is just about good enough to convert the texi2html'd manual,
# post-munging (by munge.pl) into an MCONV menu for easy reference. Other
# than that, it's a hacky mess.
#
# $Id: html2mconv.pl,v 1.4 2002/01/30 22:32:13 ivarch Exp $

use HTML::Entities ();

my $outdir = $ARGV[0];

my $data = join ('', <STDIN>);
$data =~ s/^.*?(<H1)/$1/is;
$data =~ s/<BLOCKQUOTE>.*?<\/BLOCKQUOTE>//isg;

my $count = 0;
open (MENU, ">$outdir/help.mn") or die "$outdir/help.mn: $!";
print MENU <<EOF;
.LINE
.SUB STATUS NOTIME READONLY LOWPRIORITY
EOF
foreach my $section (split (/<H1/i, $data)) {
  next if ($section =~ /^\s*$/s);
  $section =~ s/^(.*?<\/H1>)//i;
  my $name = "<H1 " . $1;
  $name =~ s/<.*?>//sg;
  $name =~ s/^\s*//s;
  next if ($name =~ /Concept Index/i);
  next if ($name =~ /Short Table of Contents/i);
  next if ($name =~ /About this document/i);
  $count ++;
  $count ++ if (chr (64 + $count) eq 'Q');
  print MENU "\n$name\n";
  print MENU "" . chr (64 + $count) . " C $count.ro\n";
  my $ctx = {};
  html_process ($ctx, $section);
  my $text = html_process ($ctx, undef);
  $text =~ s/\n\s*\[\s*<\s*\]\s*\[\s*>\s*\][^\n]+\n/\n/sg;
  open (FILE, ">$outdir/$count.ro") or die "$outdir/$count.ro: $!";
  print FILE $text;
  close (FILE);
}
close (MENU);


sub html_process {
  my ($ctx, $data) = @_;
  my $processed = 1;

  $ctx->{'out'} = '' if (not defined $ctx->{'out'});
  $ctx->{'buf'} = '' if (not defined $ctx->{'buf'});

  if (not defined $data) {
    html_process ($ctx, '  ');
    $ctx->{'out'} =~ s/\n\n\n+/\n\n/sg;
    $ctx->{'out'} =~ s/\eb\eB/\eB/sg;
    $ctx->{'out'} =~ s/\eu\eU/\eU/sg;
    $ctx->{'out'} =~ s/\eCA\eCC/\eCC/sg;
    $ctx->{'out'} =~ s/\eB\eb//sg;
    $ctx->{'out'} =~ s/\eU\eu//sg;
    $ctx->{'out'} =~ s/\eCC\eCA//sg;
    return $ctx->{'out'};
  }

  $ctx->{'marginl'} = 2   if (not defined $ctx->{'marginl'});
  $ctx->{'marginr'} = 78  if (not defined $ctx->{'marginr'});
  $ctx->{'pos'} = 0       if (not defined $ctx->{'pos'});
  $ctx->{'lastnospc'} = 1 if (not defined $ctx->{'lastnospc'});
  $ctx->{'bold'} = 0      if (not defined $ctx->{'bold'});
  $ctx->{'italic'} = 0    if (not defined $ctx->{'italic'});
  $ctx->{'underline'} = 0 if (not defined $ctx->{'underline'});
  $ctx->{'kbd'} = 0       if (not defined $ctx->{'kbd'});
  $ctx->{'code'} = 0      if (not defined $ctx->{'code'});
  $ctx->{'donep'} = 1     if (not defined $ctx->{'donep'});
  $ctx->{'pre'} = 0       if (not defined $ctx->{'pre'});

  $ctx->{'buf'} .= $data;

  return if (($ctx->{'buf'} =~ /^\s*$/s) && (not $ctx->{'pre'}));

  while ($processed) {
    $processed = 0;
    if ($ctx->{'buf'} =~ s/^([^<]+)//s) {
      html_process__text ($ctx, $1);
      $processed = 1;
    }
    if ($ctx->{'buf'} =~ s/^<(.+?)>//s) {
      html_process__tag ($ctx, $1);
      $processed = 1;
    }
    return if (($ctx->{'buf'} =~ /^\s*$/s) && (not $ctx->{'pre'}));
  }
}


sub html_process__text {
  my ($ctx, $text) = @_;

  HTML::Entities::decode ($text);
  $text =~ s/^([A-Za-z]+:)/\ea$1/mg;

  if ($ctx->{'pre'}) {
    $ctx->{'donep'} = 0;
    $ctx->{'out'} .= $text;
    return;
  }

  $text =~ s/\n/ /sg;
  return if ($text =~ /^\s*$/s);
  $text =~ s/\s+/ /sg;

  $ctx->{'lastnospc'} = 0 if (($text =~ /^\s+/s) && (not $ctx->{'donep'}));
  $ctx->{'donep'} = 0;

  if ($ctx->{'pos'} < $ctx->{'marginl'}) {
    $ctx->{'out'} .= " " x ($ctx->{'marginl'} - $ctx->{'pos'});
    $ctx->{'pos'} = $ctx->{'marginl'};
  }

  foreach my $word (split (/\s+/s, $text)) {
    next if ($word =~ /^$/);
    $ctx->{'pos'} += length $word;
    $ctx->{'pos'} ++ if (not $ctx->{'lastnospc'});
    if ((not $ctx->{'lastnospc'}) && ($ctx->{'pos'} > $ctx->{'marginr'})) {
      $ctx->{'out'} .= "\n" . (" " x $ctx->{'marginl'});
      $ctx->{'pos'} = $ctx->{'marginl'} + length $word;
    } else {
      $ctx->{'out'} .= ' ' if (not $ctx->{'lastnospc'});
      $ctx->{'lastnospc'} = 0;
    }
    $ctx->{'out'} .= "\eB" if (($ctx->{'bold'}) || ($ctx->{'italic'}));
    $ctx->{'out'} .= "\eU" if ($ctx->{'underline'});
    $ctx->{'out'} .= "[\eB" if ($ctx->{'kbd'});
    $ctx->{'out'} .= "\eCC" if ($ctx->{'code'});
    $ctx->{'out'} .= $word;
    $ctx->{'out'} .= "\eCA" if ($ctx->{'code'});
    $ctx->{'out'} .= "\eb]" if ($ctx->{'kbd'});
    $ctx->{'out'} .= "\eu" if ($ctx->{'underline'});
    $ctx->{'out'} .= "\eb" if (($ctx->{'bold'}) || ($ctx->{'italic'}));
    $ctx->{'pos'} += 2 if ($ctx->{'kbd'});
  }

  $ctx->{'lastnospc'} = 1 if ($text !~ /\s+$/s);
}


sub html_process__tag {
  my ($ctx, $tag) = @_;
  my ($on, $tagname) = (1, undef);

  $on = 0 if ($tag =~ s/^\///s);
  $tagname = uc ($1) if ($tag =~ s/^([A-Za-z0-9]+)\s*//s);
  return if (not defined $tagname);

  if (($tagname eq 'B') or ($tagname eq 'STRONG')) {
    $ctx->{'bold'} = $on;
  } elsif ($tagname eq 'U') {
    $ctx->{'underline'} = $on;
  } elsif (($tagname eq 'I') or ($tagname eq 'EM')) {
    $ctx->{'italic'} = $on;
  } elsif ($tagname eq 'CODE') {
    $ctx->{'code'} = $on;
  } elsif ($tagname eq 'KBD') {
    $ctx->{'kbd'} = $on;
  } elsif ($tagname eq 'BR') {
    $ctx->{'lastnospc'} = 1;
    $ctx->{'donep'} = 0;
    $ctx->{'out'} .= "\n";
    $ctx->{'pos'} = 0;
  } elsif ($tagname eq 'P') {
    return if (not $on);
    $ctx->{'out'} .= "\n\n" if (not $ctx->{'donep'});
    $ctx->{'lastnospc'} = 1;
    $ctx->{'donep'} = 1;
#   $ctx->{'marginl'} = 2;
#   $ctx->{'marginr'} = 78;
    $ctx->{'pos'} = 0;
  } elsif ($tagname eq 'PRE') {
    $ctx->{'donep'} = 0;
    $ctx->{'pre'} = $on;
  } elsif ($tagname =~ /^H[1-6]/s) {
    if (not $on) {
      $ctx->{'out'} .= "\n\n" if (not $ctx->{'donep'});
      $ctx->{'lastnospc'} = 1;
      $ctx->{'donep'} = 1;
      $ctx->{'marginl'} = 2;
      $ctx->{'marginr'} = 78;
      $ctx->{'pos'} = 0;
      return;
    }
    $ctx->{'out'} .= "\n" if (not $ctx->{'donep'});
    $ctx->{'lastnospc'} = 1;
    $ctx->{'donep'} = 1;
    if ($tagname =~ /^H[12]/s) {
      $ctx->{'out'} .= "\nMessage:\n\nSubject: ";
      $ctx->{'marginl'} = 7;
      $ctx->{'marginr'} = 79;
      $ctx->{'pos'} = 7;
    } else {
      $ctx->{'out'} .= "\n";
      $ctx->{'marginl'} = 0;
      $ctx->{'marginr'} = 79;
      $ctx->{'pos'} = 0;
    }
  } elsif (($tagname eq 'DIR') or ($tagname eq 'UL')) {
    $ctx->{'out'} .= "\n\n" if (not $ctx->{'donep'});
    $ctx->{'lastnospc'} = 1;
    $ctx->{'donep'} = 1;
    $ctx->{'marginl'} += ($on ? 1 : -1) * 2;
    $ctx->{'marginr'} = 78;
    $ctx->{'pos'} = 0;
  } elsif ($tagname eq 'LI') {
    return if (not $on);
    $ctx->{'lastnospc'} = 1;
    $ctx->{'out'} .= "\n" if (not $ctx->{'donep'});
    $ctx->{'donep'} = 0;
    $ctx->{'out'} .= "" . (" " x ($ctx->{'marginl'} - 2)) . "* ";
    $ctx->{'pos'} = $ctx->{'marginl'};
  } elsif ($tagname eq 'DL') {
    $ctx->{'origmarginl'} = $ctx->{'marginl'} if ($on);
    $ctx->{'marginl'} = $ctx->{'origmarginl'} if (not $on);
    if (not $on) {
      $ctx->{'out'} .= "\n\n" if (not $ctx->{'donep'});
      $ctx->{'lastnospc'} = 1;
      $ctx->{'donep'} = 1;
      $ctx->{'pos'} = 0;
    }
  } elsif ($tagname eq 'DT') {
    return if (not $on);
    $ctx->{'out'} .= "\n";
    $ctx->{'donep'} = 0;
    $ctx->{'lastnospc'} = 1;
    $ctx->{'marginl'} = $ctx->{'origmarginl'};
    $ctx->{'pos'} = 0;
  } elsif ($tagname eq 'DD') {
    if ($on) {
      $ctx->{'marginl'} = 6 + 6*int(($ctx->{'pos'} + 2) / 6);
      $ctx->{'donep'} = 1;
      $ctx->{'lastnospc'} = 1;
    } else {
      $ctx->{'marginl'} = $ctx->{'origmarginl'};
    }
  }
}

# EOF
