%{
/*
 * Copyright (C) 2002  Lorenzo Bettini <bettini@gnu.org>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 *
 */

/*
  This scanner for Perl was written by
  Alain Barbet <alian@alianwebserver.com>
*/

static int lineno = 1 ; /* number of scanned lines */

#include "genfun.h"

#include <string>

using std::string;

static void handle_sed_expression(const string &exp);

%}
%option prefix="perl_scanner_"
%option noyywrap


nl \n
cr \r
STRING \"[^\"\n]*\"
not_alpha [^a-zA-Z0-9]

%s COMMENT_STATE
%s SINGLELINE_COMMENT
%s STRING_STATE
%s CHAR_STATE

IDE [a-zA-Z_]([a-zA-Z0-9_])*
wspace [ \t\n\r]
templspec \<({wspace}|{IDE})*\>
pod (head1|head2|item)

keyword1 (chomp|chop|chr|crypt|hex|index|lc|lcfirst|length|oct|ord|pack|q|qq|reverse|rindex|sprintf|substr|tr|uc|ucfirst|m|s|g|qw)

keyword2 (abs|atan2|cos|exp|hex|int|log|oct|rand|sin|sqrt|srand|my|local|our)
keyword3 (delete|each|exists|keys|values|pack|read|syscall|sysread|syswrite|unpack|vec)

keyword4 (undef|unless|return|length|grep|sort|caller|continue|dump|eval|exit|goto|last|next|redo|sub|wantarray)
keyword5 (pop|push|shift|splice|unshift|split|switch|join|defined|foreach|last)
keyword6 (chop|chomp|bless|dbmclose|dbmopen|ref|tie|tied|untie|while|next|map)
keyword7 (eq|die|cmp|lc|uc|and|do|if|else|elsif|for|use|require|package|import)

keyword8 (chdir|chmod|chown|chroot|fcntl|glob|ioctl|link|lstat|mkdir|open|opendir|readlink|rename|rmdir|stat|symlink|umask|unlink|utime)

keyword9 (binmode|close|closedir|dbmclose|dbmopen|die|eof|fileno|flock|format|getc|print|printf|read|readdir|rewinddir|seek|seekdir|select|syscall|sysread|sysseek|syswrite|tell|telldir|truncate|warn|write)

keyword10 (alarm|exec|fork|getpgrp|getppid|getpriority|kill|pipe|qx|setpgrp|setpriority|sleep|system|times|wait|waitpid)


keyword ({keyword1}|{keyword2}|{keyword3}|{keyword4}|{keyword5}|{keyword6}|{keyword7}|{keyword8}|{keyword9}|{keyword10})

symbol ([\~\!\%\^\*\(\)\-\+\=\[\]\|\\\:\;\,\.\/\?\&\<\>])
cbracket [\{\}]

funccall {IDE}/{wspace}*({templspec}){0,1}{wspace}*\(

%%

<INITIAL>"#" { BEGIN SINGLELINE_COMMENT ; startComment( yytext ) ; }
<SINGLELINE_COMMENT>\r*\n {
   BEGIN INITIAL ;
   yyless (0); // put the \n back
   endComment( yytext ) ;
   /* if we encounter another # during a comment we simply
      treat it as a ordinary string */
 }
<INITIAL>^={pod} { BEGIN COMMENT_STATE ;
       startComment( yytext ) ;
     }
<COMMENT_STATE>\r*\n {
   endComment ("");
   ++lineno;
   generateNewLine(yytext) ;
   startComment ("");
 }
<COMMENT_STATE>^"=cut" { endComment(yytext) ;
                      BEGIN INITIAL ; /* end of pod doc */ }


<INITIAL>\\\" { generateSymbol( yytext ); /* e.g. in a sed expression */ }
<INITIAL>\" { BEGIN STRING_STATE ; startString( yytext );  }
<STRING_STATE>\\\\ {  generate_preproc( yytext ) ; }
<STRING_STATE>"\\\"" {  generate_preproc( yytext ) ; }
<STRING_STATE>\" { BEGIN INITIAL ; endString( yytext ) ; }

<INITIAL>\' { BEGIN CHAR_STATE ; startString( yytext );  }
<CHAR_STATE>\\\\ {  generate_preproc( yytext ) ; }
<CHAR_STATE>"\\\'" {  generate_preproc( yytext ) ; }
<CHAR_STATE>\' { BEGIN INITIAL ; endString( yytext ) ; }

<INITIAL>{keyword} { generateKeyWord( yytext ) ; }
<INITIAL>{symbol} { generateSymbol( yytext ); }
<INITIAL>[\{\}] { generateCBracket ( yytext ); }

<INITIAL>{keyword}/{wspace}*\( { generateKeyWord( yytext ) ; }
<INITIAL>($#[0-9a-zA-Z_]*) { generateBaseType( yytext ) ; }
<INITIAL>($[0-9a-zA-Z_]*) { generateBaseType( yytext ) ; }
<INITIAL>(@[0-9a-zA-Z_]*) { generateBaseType( yytext ) ; }
<INITIAL>(%[0-9a-zA-Z_]*) { generateBaseType( yytext ) ; }
<INITIAL>{funccall} { generateFunction ( yytext ); }

<INITIAL>0[xX][0-9a-fA-F]* { generateNumber( yytext ) ; }
<INITIAL>[0-9][0-9]*(\.[0-9]*[eE]?[-+]?[0-9]*)? { generateNumber( yytext ) ; }

<INITIAL>\<[^\"\n ]*\> { generateString (yytext); }

<INITIAL>[a-zA-Z_]([a-zA-Z0-9_])* { generate_normal( yytext ) ; }

<INITIAL>([a-zA-Z0-9_])*"/".*"/"?.*"/"([a-zA-Z0-9_])* {
        handle_sed_expression(yytext);
}

\t {
        generateTab() ;
}

\r* {
        generate_preproc(&yytext[yyleng-1]);
        // skip the previous ones, only generate one
}

. { generate_preproc( yytext ) ; /* anything else */ }

\n {
       ++lineno;
       generateNewLine() ;
}

%%

void
handle_sed_expression(const string &e)
{
  string exp = e;

  string::size_type slash = exp.find_first_of("/");
  if (slash) {
    generateKeyWord(exp.substr(0, slash).c_str());
    exp = exp.substr(slash);
  }

  string::size_type last_slash = exp.find_last_of("/");

  generate_normal(exp.substr(0, last_slash+1).c_str());

  if (last_slash < exp.size()) {
    generateKeyWord(exp.substr(last_slash+1).c_str());
  }
}

/* vim:set ft=flex expandtab cindent tabstop=4 softtabstop=4 shiftwidth=4 textwidth=0: */
