#!/usr/bin/perl -i.bak

use English;

use vars qw($EASYLATEXSTORE_RESERVED_WORD @MATH_MODE_SYMBOLS $MATH_MODE_SYMBOL_FILEPATH);

$EASYLATEXSTORE_RESERVED_WORD = 'EASYLATEXSTORE';



$MATH_MODE_SYMBOL_FILEPATH = $ARGV[1].'/math_mode_symbol_list.txt';


loadMathModeSymbols();







use vars qw($StorageIndex @Stored);

$StorageIndex = 0;


## get input
undef $INPUT_RECORD_SEPARATOR;
$file = <>;
#$file .= <>;
#print STDERR "********** here's what i got: $file \n\n ************** \n\n";




## transform it

$file = storeMathModes($file);

$file = autoMathModeEqnArray($file);
$file = storeMathModes($file);  

$file = autoMathModeEqnArrayNoEqualityInFirstEquation($file);  
$file = storeMathModes($file);  
$file = autoMathModeLeftRight($file);
$file = storeMathModes($file);  
$file = autoMathModeFrac($file);  
$file = storeMathModes($file);  
$file = autoMathModeSqrt($file);  
$file = storeMathModes($file);  
$file = autoMathModeMathbbEtc($file);  
$file = storeMathModes($file);  
$file = autoMathModeSuperOrSubscript($file);  

$file = storeMathModes($file);  
$file = autoMathModeBinarySymbols($file);

$file = storeMathModes($file);  

$file = autoMathModeSuperOrSubscript($file);  
   # done twice to cover this case: (f \circ g)^{-1}
   # and this one: a_4 = 2^2
$file = storeMathModes($file);  
$file = autoMathModeSimpleSymbols($file);  

$file = storeMathModes($file);  
$file = includeAdjacentParens($file);



$file = autoMathModeUnEscapeEqualsSigns($file);
#print STDERR "auto: $file\n";      
$file = unstoreMathModes($file);



#print STDERR "auto: $file\n";
print $file; 

sub autoMathModeLeftRight {
    my ($file) = @_;

    $file =~ s/(\\left\W.*?\\right\W)/\n\\begin{align*}\n$1\n\\end{align*}\n/gs;

    return $file;
}


sub autoMathModeFrac {
    my ($file) = @_;

    $file =~ s/(\\frac{[^}]*}{[^}]*})/\$$1\$/g;

    return $file;
}


sub autoMathModeSqrt {
    my ($file) = @_;

    $file =~ s/(\\sqrt{[^}]*})/\$$1\$/g;

    return $file;
}


sub autoMathModeMathbbEtc {
    my ($file) = @_;

    $file =~ s/(\\mathbb{[^}]*})/\$$1\$/g;
    $file =~ s/(\\mathcal{[^}]*})/\$$1\$/g;

    return $file;
}


sub autoMathModeSuperOrSubscript {
    my ($file) = @_;

#    print STDERR "superOrSub got $file\n";

    ### some regular expression "primitives"
    my $word_OrCharacterRE = '((?<!\\\\)\\\\?(?:(\w+|.)))';
    my $wordWithoutUnderscores_OrCharacterRE = '((?<!\\\\)\\\\?(?:((?!_)\w)+|.))';

    my $underscoreWhichIsNotEscapedRE = '(?<!\\\\)_';
    my $carrotWhichIsNotEscapedRE = '(?<!\\\\)\\^';
 
    my $bracketsWithoutNestingOrIsolatedCharacterRE = '((?<!\\\\){[^{]*?(?!=\\\\)}|.)(?!\w)';

    
    ### parts of the regular expression for finding subscripts
    my $subscriptBaseRE = "(?<!\w)(?!EASYLATEX)$wordWithoutUnderscores_OrCharacterRE";
    my $subscriptOpRE = $underscoreWhichIsNotEscapedRE;
    my $subscriptArgRE = $bracketsWithoutNestingOrIsolatedCharacterRE;

    my $subscriptRE = $subscriptBaseRE . $subscriptOpRE . $subscriptArgRE;

    ### parts of the regular expression for finding superscripts
    my $superscriptBaseRE = "(?:(?<=[^a-zA-Z0-9_])|(?=\\\\))(?!EASYLATEX)$word_OrCharacterRE";
    my $superscriptOpRE = $carrotWhichIsNotEscapedRE;
    my $superscriptArgRE = $bracketsWithoutNestingOrIsolatedCharacterRE;

    my $superscriptRE = $superscriptBaseRE . $superscriptOpRE . $superscriptArgRE;

#    $file =~ s/(${subscriptRE})/\$$1\$/g;
#    $file =~ s/(${superscriptRE})/\$$1\$/g;
    $file =~ s/(${subscriptRE})/processSuperOrSubscript($1)/ge;
    $file =~ s/(${superscriptRE})/processSuperOrSubscript($1)/ge;

# experimental:
#$file =~ s/${EASYLATEXSTORE_RESERVED_WORD}/processSuperOrSubscript($1)/ge;


    return $file;
}

sub processSuperOrSubscript {
    my ($block) = @_;

#    print STDERR "proc got $block\n";

    $block = unstoreMathModes($block);  
       # so that the next line does something..
    $block =~ s/\$//g;
    $block = '$'.$block.'$';

    return $block;
}

sub autoMathModeBinarySymbols {
    my ($file) = @_;

    @MATH_MODE_BINARY_SYMBOLS = map(quotemeta,@MATH_MODE_BINARY_SYMBOLS);
    my $symbol_list = join('|',@MATH_MODE_BINARY_SYMBOLS);

#    print STDERR "pre: $file\n";

    $file =~ s/(?<!\w)((?:(?!${EASYLATEXSTORE_RESERVED_WORD})\w)+ (?:$symbol_list) (?:(?!${EASYLATEXSTORE_RESERVED_WORD})\w)+)(?:(?!\w)|(?=\\))/\$$1\$/g;

### now things like "x \mapsto EASYLATEXSTORE" --> "$x \mapsto$ EASYLATEXSTORE"
    $file =~ s/(?<!\w)((?:(?!${EASYLATEXSTORE_RESERVED_WORD})\w)+ (?:$symbol_list))(?=\s*${EASYLATEXSTORE_RESERVED_WORD})/\$$1\$/g;

### now things like "EASYLATEXSTORE \mapsto x" --> "EASYLATEXSTORE $\mapsto x$"
    $file =~ s/(?<=${EASYLATEXSTORE_RESERVED_WORD})(\s*)((?:$symbol_list) (?:(?!${EASYLATEXSTORE_RESERVED_WORD})\w)+)(?:(?!\w)|(?=\\))/$1\$$2\$/g;


#    print STDERR "Repl binary: $file\n";

    return $file;
}

sub autoMathModeSimpleSymbols {
    my ($file) = @_;

    @MATH_MODE_SYMBOLS = map(quotemeta,@MATH_MODE_SYMBOLS);
    my $symbol_list = join('|',@MATH_MODE_SYMBOLS);
    
    $file =~ s/(?:(?<!\w)|(?=\\))($symbol_list)(?!\w)/\$$1\$/g;

#    print STDERR "Repl simple: $file\n";

    return $file;
}



# NOTE: this feature may require you to escape some = signs
sub autoMathModeEqnArray {
    my ($file) = @_;
    
    $notEqualsRE = '[^\n=]*';
#    $lineWithOneEqualsRE = "^($notEqualsRE) (?<!\\\\)(?:=|\\\\geq|\\\\leq) ($notEqualsRE)\$";
    $lineWithOneEqualsRE = "(?:($notEqualsRE) (?:=|\\\\geq|\\\\leq|<|>|\\\\propto|\\\\approx)(?<!\\\\=) ($notEqualsRE)\n)";

#    print STDERR "(?:^|\n\n)($lineWithOneEqualsRE+)(?:\n|$)";
    $file =~ s/(?:^|\n\n)($lineWithOneEqualsRE+)(?:\n|$)/"\n\n".eqnArrayReplaceFn($1)."\n"/eg;


    return $file;
}

sub eqnArrayReplaceFn {
    $expr = $_[0];

#    print STDERR "called earfn: $expr\n";

    $testExpr = $expr;
    $testExpr =~ s/\\textrm{.*?}//g;
    if ($testExpr =~ /EASYLATEXSTORE/)
    {
#	print STDERR "not replacing\n";
	return $expr;
    }
	

    if ($expr =~ /^.*=\s*$/)
    {
#	print STDERR "not replacing\n";
	return $expr;
    }

#  allow multiple \leqs per line?
#    if ($expr =~ /\\leq.*\\leq/)
#    {
#	print STDERR "not replacing\n";
#	return $expr;
#    }

    $expr =~ s/\n/\n\\\\/g;
    $expr =~ s/(.*)\n\\\\/\1\n/s;
    $expr = "\\begin{align*}\n".$expr."\\end{align*}";


 
    return $expr;
}



# NOTE: this feature may require you to escape some = signs
sub autoMathModeEqnArrayNoEqualityInFirstEquation {
    my ($file) = @_;
    
#    print STDERR "arrayNoEq got: $file\n";

    $notEqualsRE = '[^\n=]*';
#    $lineWithOneEqualsRE = "^($notEqualsRE) (?<!\\\\)(?:=|\\\\geq|\\\\leq) ($notEqualsRE)\$";
    $lineWithOneEqualsRE = "(?:($notEqualsRE) ?(?:=|\\\\geq|\\\\leq|<|>|\\\\propto|\\\\approx) ($notEqualsRE)\n)";
    $lineWithoutOneEqualsRE = "(?:$notEqualsRE\n)";


    $file =~ s/(?:^|\n\n)($lineWithoutOneEqualsRE$lineWithOneEqualsRE+)(?:\n|$)/"\n\n".eqnArrayReplaceFn($1)."\n"/eg;
#    $file =~ s/($lineWithOneEqualsRE+)(?:\n|$)/"\n\n".eqnArrayReplaceFn($1)."\n"/eg;

#    print STDERR "arrayNoEq put: $file\n";

    return $file;
}













sub autoMathModeUnEscapeEqualsSigns {
    my ($file) = @_;

#    @MATH_MODE_SYMBOLS = map(quotemeta,@MATH_MODE_SYMBOLS);
#    my $symbol_list = join('|',@MATH_MODE_SYMBOLS);
    
    $notEqualsRE = '[^\n=]+';
#    $lineWithOneEscapedEqualsRE = "^($notEqualsRE) (?<=\\\\)= ($notEqualsRE)\$";

#    $file =~ s/\n\n($lineWithOneEqualsRE+)\n\n/"\n\n".unescapeEquals($1)."\n\n"/emg;
    $file =~ s/\\=/=/g;

    return $file;
    
}
 
sub unescapeEquals { 
    $expr = $_[0];


#    print STDERR "Unescaping $expr\n";

    $expr =~ s/\\=/=/;

    return $expr;
}



sub includeAdjacentParens {
    my ($block) = @_;

    $block =~ s/(\(\s* ?${EASYLATEXSTORE_RESERVED_WORD}_\d+_${EASYLATEXSTORE_RESERVED_WORD} ?)/processAdjParensLeft($1)/ge;  
    $block =~ s/( ?${EASYLATEXSTORE_RESERVED_WORD}_\d+_${EASYLATEXSTORE_RESERVED_WORD} ?\s*\))/processAdjParensRight($1)/ge;  

return $block;
}

sub processAdjParensLeft {
    my ($block) = @_;

    $block =~ /${EASYLATEXSTORE_RESERVED_WORD}_(\d+)_${EASYLATEXSTORE_RESERVED_WORD}/;
$storageID = $1;
$Stored{$storageID} = '('.$Stored{$storageID};

    $block = substr($block, 1);

#print STDERR "NEWBLOCK($storageID=$Stored{$storageID}): $block\n";

    return $block;
}

sub processAdjParensRight {
    my ($block) = @_;

    $block =~ /${EASYLATEXSTORE_RESERVED_WORD}_(\d+)_${EASYLATEXSTORE_RESERVED_WORD}/;
$storageID = $1;
$Stored{$storageID} = $Stored{$storageID} . ')';

$block = substr($block, 0, -1);

#print STDERR "AP $block";

    return $block;
}

######################
# "store" subroutines
######################

#currently unused; TODO
sub storeOther {
    my ($file) = @_;

    $file =~ s/\\documentclass{([^}]+)}/'\\documentclass{' . store($1) . '}'/egs;

    return $file;
}

sub storeMathModes {
    my ($file) = @_;

    ${notSlashRE_begin} = '(^|(?!\\).)';
    ${notSlashRE} = '(?!\\).';

    $file =~ s/(\\begin{align}.*?\\end{align})/store($1)/egs;
    $file =~ s/(\\begin{align\*}.*?\\end{align\*})/store($1)/egs;
    $file =~ s/(\\begin{eqnarray}.*?\\end{eqnarray})/store($1)/egs;
    $file =~ s/(\\begin{eqnarray\*}.*?\\end{eqnarray\*})/store($1)/egs;
    $file =~ s/(\\begin{equation}.*?\\end{equation})/store($1)/egs;
    $file =~ s/(\\begin{equation\*}.*?\\end{equation\*})/store($1)/egs;
    $file =~ s/(\\begin{verbatim}.*?\\end{verbatim})/store($1)/egs;
    $file =~ s/(\\documentclass{.*?})/store($1)/egs;
    $file =~ s/\\documentclass{([^}]+)}/'\\documentclass{' . store($1) . '}'/egs;

    $file =~ s/((?<!\\)\$.*?(?<!\\)\$)/store($1)/egs;

    return $file;
}

sub unstoreMathModes {
    my ($file) = @_;

#    print STDERR "UNSTORE:\n\n".$file;

    while ($file =~ s/ ?${EASYLATEXSTORE_RESERVED_WORD}_(\d+)_${EASYLATEXSTORE_RESERVED_WORD} ?/$Stored{$1}/egs) {}

    return $file;
}



sub store {
    # got this idea from UseMod
  my ($toBeStored) = @_;

  $Stored{$StorageIndex} = $toBeStored;
#  print STDERR "STORED: $toBeStored\n";

#  print STDERR "REPLACEDW: ".${EASYLATEXSTORE_RESERVED_WORD}.'_' . $StorageIndex . '_'.${EASYLATEXSTORE_RESERVED_WORD};

  return ' '.${EASYLATEXSTORE_RESERVED_WORD}.'_' . $StorageIndex++ . '_'.${EASYLATEXSTORE_RESERVED_WORD}.' ';
}


######################
# utility subroutines
######################


sub loadMathModeSymbols {

#I got the symbol tables by doing this on my own machine:
#    cd /usr/share/texmf/tex/latex
#    grep -r DeclareMathSymbol * > /tmp/t
#    perl -pi -e 's/.*?:\s*\\DeclareMathSymbol{?(\\\w+)}?(\s|{).*/$1/g' /tmp/t
#    perl -pi -e 's/.*?:\s*(\\\w+)(\s|$)/$1/gm' /tmp/t
#    perl -i -e 'while (<>) {if (!/:/) {print $_;}}' /tmp/t
#
#    cd {src directory of "The not so Short Introduction to LaTeX"}
#    cat lssym.tex | perl -e 'undef $/; $_ = <>; while (/(?:{(\\\w+)}|\\verb\|(\\\w+)\|)/g) {print "$1$2\n";}' lssym.tex  >> /tmp/t
#
#    sort /tmp/t > /tmp/t2
#    uniq /tmp/t2 > /tmp/t
#    grep -v '\\DeclareMathSymbol' /tmp/t > /tmp/t2 
#
#and then I moved /tmp/t2 into the file math_mode_symbol_list.txt
#
# and then manually added "\ldots" at the end of the file
#

    $INPUT_RECORD_SEPARATOR = "\n";

    open(SYMBOLFILE, $MATH_MODE_SYMBOL_FILEPATH) or die "Can't open list of math mode symbols at $MATH_MODE_SYMBOL_FILEPATH";
    while ($line = <SYMBOLFILE>)
    {
	chomp $line;
	push(@MATH_MODE_SYMBOLS,$line);
	push(@MATH_MODE_BINARY_SYMBOLS,$line);
    }
    close(SYMBOLFILE);

	push(@MATH_MODE_BINARY_SYMBOLS,'+');
	push(@MATH_MODE_BINARY_SYMBOLS,'-');
	push(@MATH_MODE_BINARY_SYMBOLS,'*');
	push(@MATH_MODE_BINARY_SYMBOLS,'/');
	push(@MATH_MODE_BINARY_SYMBOLS,'=');


}

