#!/usr/bin/perl

# This small script corrects some problems existing with the
# HTML generated by tex4ht (tex4ht version was 1999-07-18-00-02).

$IsReadingComplete = 0;

while (! $IsReadingComplete)
  {
    $Line = "";

    # this loop will read the whole file into a single string
    # this is intended to avoid things like 
    #     [quote]<tag1>[newline]<tag2>[text]
    # which is rendered with a space between [quote] and [text]
    # which is not desireable (scope: w3m, maybe opera and others)
    while (1)
      {
        if ($CurrentLine=<STDIN>)
          {
            chop $CurrentLine;

            # ignore lines containing only spaces and tabs:
            # (WARN may not be correct for preformatted text)
            next if ($CurrentLine =~ /^[ \t]*$/);

            $Line .= " " . $CurrentLine;
            # last if ($CurrentLine =~ /^[ \t]*<$/);
          }
        else
          {
            $IsReadingComplete = 1;
            last;
          }
      }

    # correct this Unicode(?) references for single and double 
    # quotes to somethings that can be understood by almost any 
    # browser
    $Line =~ s/\&\#8216\;/\`/g;
    $Line =~ s/\&\#8217\;/\'/g;
    $Line =~ s/\&\#8220\;/\`\`/g;
    $Line =~ s/\&\#8221\;/\'\'/g;

    # correct URLs: The href=URL still contains '\&' but it should be 
    # '&amp;'. However, the text of the URLs is ok.
    $Line =~ s/\\\&/\&amp;/g; 

    # removing the space is needed to get single (and maybe also 
    # double quotes) which don't have a space to the text they include
    # (the problem exists only if the text included by quotes contains
    # tags, example: '<font color=...> <tt>TEXT<tt> </font>')
    $Line =~ s/> *</></g; 
    $Line =~ s/ <\//<\//g; 

    print "$Line\n" if ($Line !~ /^[ \t]*$/);
  }

