package Parse::Eyapp::Cleaner; use warnings; use strict; use Scalar::Util qw{reftype}; my $lexlevel = 0; # Used by the lexical analyzer. Controls in which section we are: my ( $input, # head (0), body(1) or tail (2) @lineno, # Used by the lexical analyzer. $lineno[0] is the line number for # the beginning of the token, $lineno[1] the end $nberr, # Number of errors up to now ); my $filename; my $bypass = 0; sub trim { $_[0] =~ s/\s+$//; $_[0] =~ s/^\s+//; } sub controller { my $ouput = ''; local $/= undef; $$input = <>; while () { my ($token, $attr) = _Lexer(); last unless $token; if ($token eq '.') { ($token, $attr) = _Lexer(); next; } next unless defined(reftype($attr)) && defined($attr->[0]); print $attr->[0] unless ($token =~ /(CODE)|\$/); } print "\n"; } { my $output = ''; sub _generate { $output .= sprintf "%s"x@_, @_; } sub ppcontroller { $input = shift; my %args = @_; my $skipcomments = $args{skipcomments}? '|COMMENT' : ''; my $depth = 0; my $delete_set = 'CODE|BLANKS|DEFAULTACTION|\n|\$'.$skipcomments; $delete_set = qr{$delete_set}; my $end_cr_set = qr{\n\s*$|^\s*$}; my $ouput = ''; my ($ptoken, $pattr) = ('', ['', -1]); while () { my ($token, $attr) = _Lexer(); last unless $token; next if $token eq '$'; next unless defined(reftype($attr)) && defined($attr->[0]); if ($token eq '.') { # attribute name ($token, $attr) = _Lexer(); } elsif ($token eq 'NAME') { trim($attr->[0]); my $g = ($depth == 0)? "\n " : " "; _generate $attr->[0].$g; } elsif ($token =~ /\b(VARIABLE)\b/) { my $g = ($output =~ $end_cr_set)? '': "\n"; $attr->[0] =~ s/\s*:\s*$/:/; # remove blanks before and after colon _generate $g.$attr->[0]."\n "; } elsif ($token =~ /\b(IDENT|LITERAL|NUMBER|REGEXP)\b/) { _generate $attr->[0]." "; } elsif ($token =~ /(PREC|STAR\b|PLUS|OPTION|[)(])/) { $depth++ if $token eq '('; $depth-- if $token eq ')'; _generate $attr->[0]." "; } elsif ($token =~ /\b(TOKEN|ASSOC|CONFLICT|SYNTACTIC|SEMANTIC|STRICT|START|EXPECT|NAMINGSCHEME|LEXER|UNION)\b/) { my $g = ($output =~ $end_cr_set)? '': "\n"; _generate $g.$attr->[0]." "; } elsif ($token =~ /TREE/) { _generate "\n".$attr->[0]."\n"; } elsif ($token eq ':') { _generate ":\n "; } elsif ($token eq '|') { $output =~ s/[ \t]*$//; my $g = ($output =~ $end_cr_set)? '': "\n"; _generate "$g | "; #_generate "\n ".$attr->[0]." "; } elsif ($token eq ';') { my $g; if ($output =~ m{[:|]\s*$}) { $g = "/* empty */\n"; } elsif ($output =~ $end_cr_set) { $g = ''; } else { $g = "\n"; } _generate "$g;\n"; } elsif ($token eq '%%') { my $g = ($output =~ $end_cr_set)? '': "\n"; _generate "$g\n%%\n\n"; } else { _generate $attr->[0] unless ($token =~ $delete_set); } ($ptoken, $pattr) = ($token, $attr); } _generate "\n"; $output =~ s/\s*\Z/\n/; return $output; } } # end closure sub slurp_perl_code { my($level,$from,$code); $from=pos($$input); $level=1; while($$input=~/([{}])/gc) { substr($$input,pos($$input)-1,1) eq '\\' #Quoted and next; $level += ($1 eq '{' ? 1 : -1) or last; } $level and _SyntaxError(2,"Unmatched { opened line $lineno[0]",-1); $code = substr($$input,$from,pos($$input)-$from-1); $lineno[1]+= $code=~tr/\n//; return [ $code, $lineno[0] ]; } sub _Lexer { #At EOF pos($$input) && (pos($$input) >= length($$input)) and return('',[ '', -1 ]); #In TAIL section $lexlevel > 1 and do { my($pos)=pos($$input); $lineno[0]=$lineno[1]; $lineno[1]=-1; pos($$input)=length($$input); }; #Skip blanks $lexlevel == 0 ? $$input=~m{\G((?: # Head section: \n separates declarations [\t\ ]+ # Any white space char but \n )+)}xsgc : $$input=~m{\G((?: \s+ # any white space char, including \n )+)}xsgc and do { my($blanks)=$1; #Maybe At EOF pos($$input) >= length($$input) and return('',[ $blanks, -1 ]); $lineno[1]+= $blanks=~tr/\n//; return ('BLANKS', [ $blanks, $lineno[0]]); }; $$input=~m{\G((?: \#[^\n]*\s* # Perl like comments | /\*.*?\*/\s* # C like comments )+)}xsgc and do { my($blanks)=$1; #Maybe At EOF pos($$input) >= length($$input) and return('',[ $blanks, -1 ]); $lineno[1]+= $blanks=~tr/\n//; trim($blanks); return ('COMMENT', [ $blanks, $lineno[0]]); }; $lineno[0]=$lineno[1]; $$input=~/\G([A-Za-z_][A-Za-z0-9_]*\s*:)/gc and return('VARIABLE',[ $1, $lineno[0] ]); $$input=~/\G([A-Za-z_][A-Za-z0-9_]*)/gc and return('IDENT',[ $1, $lineno[0] ]); $$input =~ m{\G( / # opening slash (?:[^/\\]| # an ordinary character \\\\| # escaped \ i.e. \\ \\/| # escaped slash i.e. \/ \\ # escape i.e. \ )*? # non greedy repetitions / # closing slash ) }xgc and return('REGEXP',[ $1, $lineno[0] ]); $$input=~/\G( ' # opening apostrophe (?:[^'\\]| # an ordinary character \\\\| # escaped \ i.e. \\ \\'| # escaped apostrophe i.e. \' \\ # escape i.e. \ )*? # non greedy repetitions ' # closing apostrophe )/gxc and do { my $string = $1; # The string 'error' is reserved for the special token 'error' $string eq "'error'" and do { _SyntaxError(0,"Literal 'error' ". "will be treated as error token",$lineno[0]); return('IDENT',[ 'error', $lineno[0] ]); }; my $lines = $string =~ tr/\n//; _SyntaxError(2, "Constant string $string contains newlines",$lineno[0]) if $lines; $lineno[1] += $lines; return('LITERAL',[ $string, $lineno[0] ]); }; # New section: body or tail $$input=~/\G(%%)/gc and do { ++$lexlevel; return($1, [ $1, $lineno[0] ]); }; $$input=~/\G%begin\s*{/gc and do { return ('BEGINCODE', &slurp_perl_code()); }; $$input=~/\G{/gc and do { &slurp_perl_code(); }; if($lexlevel == 0) {# In head section $$input=~/\G(%(left|right|nonassoc))/gc and return('ASSOC',[ $1, $lineno[0] ]); $$input=~/\G(%start)/gc and return('START',[ $1, $lineno[0] ]); $$input=~/\G(%expect)/gc and return('EXPECT',[ $1, $lineno[0] ]); $$input=~/\G(%namingscheme)/gc and return('NAMINGSCHEME',[ $1, $lineno[0] ]); $$input=~/\G%{/gc and do { my($code); $$input=~/\G(.*?)%}/sgc or _SyntaxError(2,"Unmatched %{ opened line $lineno[0]",-1); $code=$1; $lineno[1]+= $code=~tr/\n//; return('HEADCODE',[ $code, $lineno[0] ]); }; $$input=~/\G(%token)/gc and return('TOKEN',[ $1, $lineno[0] ]); $$input=~/\G(%conflict)/gc and return('CONFLICT',[ $1, $lineno[0] ]); $$input=~/\G(%strict)/gc and return('STRICT',[ $1, $lineno[0] ]); $$input=~/\G(%semantic\s+token)/gc and return('SEMANTIC',[ $1, $lineno[0] ]); $$input=~/\G(%syntactic\s+token)/gc and return('SYNTACTIC',[ $1, $lineno[0] ]); $$input=~/\G(%type)/gc and return('TYPE',[ $1, $lineno[0] ]); $$input=~/\G%prefix\s+([A-Za-z_][A-Za-z0-9_:]*::)/gc and return('PREFIX',[ $1, $lineno[0] ]); $$input=~/\G(%union)/gc and return('UNION',[ $1, $lineno[0] ]); $$input=~/\G(%lexer)/gc and return('LEXER',[ $1, $lineno[0] ]); $$input=~/\G(%defaultaction)/gc and return('DEFAULTACTION',[ $1, $lineno[0] ]); $$input=~/\G(%tree((?:\s+(?:bypass|alias)){0,2}))/gc and do { my $treeoptions = defined($2)? $2 : ''; return('TREE',[ $1, $lineno[0] ]) }; $$input=~/\G(%metatree)/gc and return('METATREE',[ $1, $lineno[0] ]); $$input=~/\G([0-9]+)/gc and return('NUMBER',[ $1, $lineno[0] ]); } else {# In rule section $$input=~/\G(%prec)/gc and return('PREC',[ $1, $lineno[0] ]); $$input=~/\G((<\s*%name\s*([A-Za-z_][A-Za-z0-9_]*)\s*)?\*\s*>)/gc and return('STAR',[ $1, $lineno[0] ]); $$input=~/\G((%name\s*([A-Za-z_][A-Za-z0-9_]*)\s*)?\*)/gc and return('STAR',[ $1, $lineno[0] ]); $$input=~/\G((<\s*%name\s*([A-Za-z_][A-Za-z0-9_]*)\s*)?\+\s*>)/gc and return('PLUS',[ $1, $lineno[0] ]); $$input=~/\G((%name\s*([A-Za-z_][A-Za-z0-9_]*)\s*)?\+)/gc and return('PLUS',[ $1, $lineno[0] ]); $$input=~/\G((<\s*%name\s*([A-Za-z_][A-Za-z0-9_]*)\s*)?\?\s*)>/gc and return('OPTION',[ $1, $lineno[0] ]); $$input=~/\G((%name\s*([A-Za-z_][A-Za-z0-9_]*)\s*)?\?)/gc and return('OPTION',[ $1, $lineno[0] ]); $$input=~/\G(%no\s+bypass\s+[A-Za-z_][A-Za-z0-9_]*\s*)/gc and do { return('NAME',[ $1, $lineno[0] ]); }; $$input=~/\G(%name\s+[\w:]*\n?)/gc and do { return('NAME',[ $1, $lineno[0] ]); }; } #Always return something $$input=~/\G(.)/sg or return ('', ['', -1]); $1 eq "\n" and ++$lineno[1]; ( $1 ,[ $1, $lineno[0] ]); } sub _SyntaxError { my($level,$message,$lineno)=@_; $message= "*". [ 'Warning', 'Error', 'Fatal' ]->[$level]. "* $message, at ". ($lineno < 0 ? "eof" : "line $lineno")." at file $filename\n"; $level > 1 and die $message; warn $message; $level > 0 and ++$nberr; $nberr == 20 and die "*Fatal* Too many errors detected.\n" } 1;