#!/usr/bin/perl
# vim:expandtab sts=4 ts=4 shiftwidth=4 number :

#    gladtex: Reads a 'htex' file (html with LaTeX maths embedded in <EQ></EQ>)
#             and produces html with equations substituted by images.
#    Project homepage at http://gladtex.sourceforge.net
#    Copyright (C) 1999-2010 Martin G. Gulbrandsen
#
#    This program is free software; you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation; either version 2 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with this program; if not, write to the Free Software
#    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

# note: the utility 'eqn2img' should accompany this script,
#       and must be callable from this script.

use warnings;
use strict;

use IPC::Open2;
use Cwd;
use Getopt::Std;
use Storable;
use File::Spec;

our $img_dir         = ".";             # default values
our $img_dir_relative = ".";
our $dpi             = 100;
our $supersample     = 4;
our $format          = "png";
my $verbose         = 0;
our $foreground      = "000000";
our $background      = "A0A0A0";
our $transparency    = 1;
our $environment     = "displaymath";
our $blockMathClass  = "displaymath";
our $inlineMathClass = "inlinemath";

my $preamble = "
\\usepackage{amsmath}
\\usepackage{amssymb}
";

our $usage = "gladtex version 1.2, Copyright (C) 1999-2010 Martin G. Gulbrandsen

gladtex comes with ABSOLUTELY NO WARRANTY. This is free software,
    and you are welcome to redistribute it under certain conditions;
see the file COPYING for details.

Project homepage at http://gladtex.sourceforge.net

Usage: gladtex [OPTION]... [FILE]...
Convert htex file (HTML with LaTeX equations) to html with images

-a         save text alternatives for images which are too long for the alt
           attribute into a single separate file and link images to it
-b colour   set background RGB colour ($background by default)
-c colour   set foreground RGB colour ($foreground by default)
-d path     store image files in 'path' (current directory is default), affects
            also place of image alternatives
-e env      embed LaTeX code in \\begin{env}..\\end{env} ($environment by default)
-f format   store images in 'format' (png by default)
-i class    CSS class to assign to inline math (default: 'inlinemath')
-l class    CSS class to assign to block-level math (default: 'displaymath')
-p string   add 'string' to LaTeX preamble (e.g. \\usepackage{...})
-r dpi      set resolution (size of images) to 'dpi' ($dpi by default)
-s n        set oversampling factor for antialiasing to 'n' ($supersample by default)
-t          turn transparency OFF
-u url      url to image files above (relative links are default)
-v          print verbose information

Output files:
*.html      copy of input FILE(s), with <EQ>..</EQ> tags substituted
filename is same as input file, with the extension replaced
eqn???.png  equation images (png extension is example only, see -f option)

Input files may not have .html extension, .htex is required.
";

$| = 1;    # flush output after every print, causes better feedback with -v flag
my $img_name = "eqn000";

# --- sub: rel_name ---
# usage: rel_name $src, $dest;
# $src and $dest should be absolute paths
# returns: relative path to $dest, as seen from $src
sub rel_name {
    my @src  = split /\//, shift;
    my @dest = split /\//, shift;
    my $path;
    my $i = 0;

    # let $i = first level where $src and $dest doesn't match
    for (
        $i = 0 ;
        $i <= $#src and $i <= $#dest and $src[$i] eq $dest[$i] ;
        $i++
      )
    {
    }

    $path = "../" x ( $#src - $i + 1 );

    for ( ; exists $dest[$i] ; $i++ ) {
        $path .= "$dest[$i]/";
    }

    return $path;
}

# --- sub: full_name ---
# usage: full_name $src, $dest
# $src should be some absolute path
# $dest may be relative (as seen from $src) or absolute
# returns absolute path to $dest, as seen from $src (without trailing /)
sub full_name {
    my $src  = shift;
    my $dest = shift;

    # add trailing / if not present $dest .= "/" unless $dest =~ /\/$/;
    # if not absolute path, add $src
    $dest = $src . "/$dest" unless $dest =~ /^\//;

    # remove ./
    $dest =~ s/\.\///g;

    # remove //
    while ( $dest =~ s{//}{/}g ) { }

    # remove ../
    while ( $dest =~ s{/([^/]*/)\.\./}{$1}g ) { }

    # remove trailing /
    $dest =~ s/\/$//;

    return $dest;
}

# --- Parse command line options ---

our %options = ();
getopts( 'f:r:s:d:u:vtc:b:p:e:l:i:a', \%options );

if ( $#ARGV < 0 ) {
    print $usage;
    print `eqn2img -f?`;    # this prints list of supported formats
    exit;
}

$img_dir      = $options{d} if $options{d};
$dpi          = $options{r} if $options{r};
$supersample  = $options{s} if $options{s};
$format       = $options{f} if $options{f};
$verbose      = $options{v} if $options{v};
$transparency = 0           if $options{t};
$preamble .= "$options{p}\n" if $options{p};
$foreground      = $options{c} if $options{c};
$background      = $options{b} if $options{b};
$environment     = $options{e} if $options{e};
$blockMathClass  = $options{l} if $options{l};
$inlineMathClass = $options{i} if $options{i};
our $url;
our %history;

if ( $options{u} ) {
    $url = $options{u};
    $url .= "/" unless $url =~ /\/$/;
    if ( !$options{d} ) {
        print "\nWarning: -u option present, but no -d\n\n";
    }
}
$options{t} =
  0;    # just do something with $options{t} to avoid 'possible typo' warning

# todo: add validation of options

if ( $options{d} and !defined $url ) {
    $img_dir_relative = $img_dir;
    $img_dir = full_name( getcwd(), $img_dir );
    if ( !-d $img_dir ) {
        unless ( mkdir $img_dir ) {
            die "Unable to create $img_dir.\n";
        }
    }
}

# read in cached history
if ( $options{d} and -r "$img_dir/gladtex.cache" ) {
    print "Retrieving cache\n" if $verbose;
    eval '%history = %{ retrieve("$img_dir/gladtex.cache") };';
    if($@) {
        # if it exists, but was not readable, just remove it and start from the
        # beginning
        print "Old GladTeX cache was not readable, removed; also removing all equations.\n";
        opendir(DIR, "$img_dir") || die "Can't open directory: $!\n";
        while( my $fn = readdir(DIR) ) {
            if( $fn =~ /^eqn/ ) {
                print "Remove " . File::Spec->catdir( $img_dir, $fn) . "\n" if $verbose;
                eval 'unlink( File::Spec->catdir( $img_dir, $fn ) )'; warn $@ if $@;
            }
        }
        unlink( "$img_dir/gladtex.cache" );
        %history = ();
    }
}

# empty desc.html, if used:
if($options{a}) {
    open( DESCFILE, ">", File::Spec->catdir( $img_dir, 'desc.html' ) );
    print DESCFILE "<html><body>\n";
    close( DESCFILE );
}

my $desc_file_used = 0;

# --- Process input files ---

print "Processing ", $#ARGV + 1, " files\n" if $verbose;

our $startup_cwd = getcwd();

# all variables which are used in the following big loop construction; those
# were not declared in an older version of the script, but they need to for "use
# strict"
our ( $file, $directory, $basename, $full_dir );
my ( $extension, $eqn2img_opt );
my ( $img_src,   $dimensions );
my $pid;
my $id_number=0;

# replace those $this_* through the correct variables later
our $this_preamble    = $preamble;
our $this_foreground  = $foreground;
our $this_background  = $background;
our $this_environment = $environment;
our $this_dpi         = $dpi;
our $this_css_class   = $inlineMathClass;

foreach $file (@ARGV) {
    ( $directory, $basename, $extension ) =
      $file =~ /(.*?)\/*([^\/]*?)\.([^\/]*)$/;
    $directory or $directory = ".";
    $basename  or $basename  = "noname";
    $extension or $extension = "htex";
    $extension eq "html"
      and die "Don't use .html extension, .htex is required.";

    $full_dir = full_name( $startup_cwd, $directory );
    if ( getcwd() ne $full_dir ) {
        if ( !$options{d} and %history ) {
            print "Storing cache\n" if $verbose;
            store( \%history, "gladtex.cache" );
            undef %history;
        }

        chdir $full_dir;
        $img_name = "eqn000" if !$options{d};
    }
    if ( !$options{d} and !%history ) {
        if ( -r "gladtex.cache" ) {
            print "Retrieving cache\n" if $verbose;
            %history = %{ retrieve("gladtex.cache") };
        }
    }

    open( INPUT,  "$basename.$extension" ) or die "Cannot open $file";
    open( OUTPUT, ">$basename.html" )      or die "Cannot open $basename.html";

    print "\n$file -> $basename.html\n" if $verbose;

    my ($start_line, $end_line) = 0;
    our $line = "";

    while(not eof INPUT)  {
        $line = <INPUT>;
        $start_line++;
        $end_line = $start_line;

        # search for <eq> tag (the s option is needed to avoid loosing linebreak at end of line)
        while ( $line =~ /(.*?)<eq(.*?)>(.*)/is ) {
            my $equation = "";
            my $alt_equation;
            my $alt_long_equation="";
            my $append_space = 0;

            print OUTPUT $1;    # everything before <eq> tag
            my $options = $2;   # anything between '<eq' and '>'
            $line = $3;         # the rest

            # scan options within <eq> tag
            my $key;
            while ( $options =~ /\s*?(\S*?)=\s*(\"(.*?)\"|\'(.*?)\'|(\S*))/g )
            {    # should whitespace be allowed around equal sign?
                $key = $1;

                # only one of these will be defined
                my $value;
                $value = $3 if defined $3;    # "value"
                $value = $4 if defined $4;    # 'value'
                $value = $5 if defined $5;    # value (no quotation marks)
                      # is there a better way to scan for key/value pairs?

                foreach ($key) {    # may add more options here when needed..
                    /^preamble/i and $this_preamble .= "$value\n";
                    /^color/i   and $this_foreground  = $value;
                    /^bgcolor/i and $this_background  = $value;
                    /^env/i     and $this_environment = $value;
                    /^dpi/i     and $this_dpi         = $value;
                }
            }

            if ( $this_environment eq "displaymath" ) {
                $this_css_class = $blockMathClass;
            }
            elsif ( $this_environment eq "math" ) {
                $this_css_class = $inlineMathClass;
            }

            # read equation until </eq> is found
            my $before;
            my $after;
            while (
                not( ( $before, $after ) = ( $line =~ /(.*?)<\/eq>(.*?)$/is ) )
              )
            {
                $equation .= "$line";
                if ( eof INPUT ) {
                    print STDERR "Closing tag </eq> not found in equation started at line $start_line\n";

                    # todo: cleanup
                    exit 1;
                }
                $line = <INPUT>;
                $end_line++;
            }
            $equation .= $before;    # everything before </eq>
            if($after eq "") {
                $append_space = 1;
            }
            $line = $after;

            # save equation for later use in alt tag / outsourced description
            $alt_equation = $equation;
            # replace " in alt tag:
            $alt_equation =~ s/"/&quot;/g;
            if ( length($alt_equation) > 80 and $options{a}) {
                $alt_equation = substr( $alt_equation, 0, 76 ) . " ...";
                $alt_long_equation = $equation;
                $id_number++;
            }

            # strip whitespace: this makes 'history' stronger and removes linebreak
            # trouble (a paragraph can't end within $$..$$ in latex)
            $equation =~ s/\s+/ /g;

            # Resolve entities in the equation, since the document
            # ought to be well-formed HTML.
            $equation =~ s/&gt;/>/g;
            $equation =~ s/&lt;/</g;

            print "Processing equation at line(s) $start_line to $end_line:\n"
              if $verbose;

            if ( $options{u} ) {
                $img_src = $url;
            }
            else {
                if ( $options{d} ) {
                    $img_src = rel_name( getcwd(), $img_dir );
                }
                else {
                    $img_src = "";
                }
            }

            $eqn2img_opt =
"-e $this_environment -r $this_dpi -p '$this_preamble' -c $this_foreground -b $this_background -f $format -s $supersample "
              . ( $transparency ? ""    : "-t " )
              . ( $verbose      ? "-v " : "" );

            # --- process the latex code in $equation ---
            # recycle image if the same equation has appeared before with the
            # same options (colors etc.)
            if (    $history{$equation}
                and $history{$equation}->{"opt"} eq $eqn2img_opt )
            {
                print "Reusing image\n" if $verbose;
            }
            else {
                while ( -e "$img_dir/$img_name.$format" ) { $img_name++ }; # never overwrite an image
                print "$img_dir/$img_name: " if $verbose;

                $pid = open2( \*eqn2img_out, \*eqn2img_in,
                    "eqn2img $eqn2img_opt -o '$img_dir/$img_name.$format'" );
                print eqn2img_in $equation;
                close eqn2img_in;
                $dimensions = <eqn2img_out>;
                waitpid $pid,
                  0;  # close seems not to set $? when using open2, why is that?
                if ($?) {
                    print STDERR
"Error processing equation starting at line $start_line:\n",
                      $equation, "\n";
                    print STDERR "See ${img_dir}/${img_name}.log for details\n";
                    exit 1;
                }
                close eqn2img_out;

                print ", done.\n" if $verbose;
                $history{$equation}->{"opt"} = $eqn2img_opt;
                $history{$equation}->{"img"} = "$img_name.$format";
                $history{$equation}->{"dim"} = $dimensions;
            }
            # if $alt_long_equation is != 0, wrap img in a link
            if($alt_long_equation ne "" and $options{a}) {
                print OUTPUT "<a href=\"" . File::Spec->catdir( $img_dir_relative, 
                    'desc.html' ) . "#" . $id_number . "\">";
            }
            print OUTPUT " <img class=\""
              . $this_css_class
              . "\" src=\""
              . $img_src
              . $history{$equation}->{"img"} . "\" "
              . $history{$equation}->{"dim"}
              . " alt=\""
              . $alt_equation . "\" />";
            if($append_space) {
                print OUTPUT " ";
            }
            # add </a>
            if($alt_long_equation ne "" and $options{a}) {
                print OUTPUT "</a>\n";
        }

        # write alt_long_equation out, if appropriate
            if ( $alt_long_equation ne "" ) {
                if ( $options{a} ) {
                    $desc_file_used = 1;
                    open( DESCFILE, ">>",
                        File::Spec->catdir( $img_dir, 'desc.html' ) );
                    print DESCFILE "<p><a name=\"" . $id_number
                        . "\"/><pre>$alt_long_equation</pre></p><hr/>\n";
                    close(DESCFILE);
                }
            }
        }
        print OUTPUT "$line";

        $start_line = $end_line;
    }
}
close(INPUT);
close(OUTPUT);

sub Shutdown {
    ### write out data
    ## write out HTML ending for desc.html
    if($options{a}) {
        if($desc_file_used) {
            open( DESCFILE, ">>", File::Spec->catdir( $img_dir, 'desc.html' ) );
            print DESCFILE "\n</body></html>";
            close(DESCFILE);
        }
    } 
    ## write out cache, if apppropriate
    # write history cache
    if(%history) {
        print "Storing cache\n" if $verbose;
        store( \%history, File::Spec->catdir( $img_dir, "gladtex.cache") );
    }


    ## remove desc.html, if empty
    if(!$desc_file_used) {
        unlink(File::Spec->catdir( $img_dir, 'desc.html' ));
    }

    # if no gladtex cache was written, no equations and no desc.html, the
    # img_dir will be empty, remove it
    my $i=0;
    opendir(DIR, "$img_dir") or die "Cant open $img_dir: $!\n";
    my @files = readdir(DIR);
    foreach $file(@files) {
        unless ($file =~ /^[.][.]?\z/) {
            $i++;
        }
    }
    if ($i == 0) {
        rmdir($img_dir);
    }
    closedir(DIR);
}

Shutdown();
