# # Module Parse::Eyapp::Grammar # # Copyright © 2006, 2007, 2008, 2009, 2010, 2011, 2012 Casiano Rodriguez-Leon. # Copyright © 2017 William N. Braswell, Jr. # All Rights Reserved. # # Parse::Yapp is Copyright © 1998, 1999, 2000, 2001, Francois Desarmenien. # Parse::Yapp is Copyright © 2017 William N. Braswell, Jr. # All Rights Reserved. package Parse::Eyapp::Grammar; @ISA=qw( Parse::Eyapp::Options ); require 5.004; use Carp; use strict; use Parse::Eyapp::Options; use Parse::Eyapp::Parse; use Scalar::Util qw{reftype}; use Data::Dumper; ############### # Constructor # ############### sub new { my($class)=shift; my($values); my($self)=$class->SUPER::new(@_); my($parser)=new Parse::Eyapp::Parse; defined($self->Option('input')) or croak "No input grammar"; $values = $parser->Parse($self->Option('input'), # 1 input $self->Option('firstline'), # 2 Line where the grammar source starts $self->Option('inputfile'), # 3 The file or program containing the grammar $self->Option('tree'), # 4 %tree activated $self->Option('nocompact'), # 5 %nocompact $self->Option('lexerisdefined'), # 6 lexer is defined $self->Option('prefix'), # 7 accept prefix $self->Option('start'), # 8 specify start symbol #$self->Option('prefixname'), # yyprefix #$self->Option('buildingtree') # If building AST ); undef($parser); $$self{GRAMMAR}=_ReduceGrammar($values); ref($class) and $class=ref($class); bless($self, $class); my $ns = $self->{GRAMMAR}{NAMINGSCHEME} ; if ($ns && reftype($ns) eq 'ARRAY') { $ns = eval "sub { $ns->[0]; }; "; warn "Error in \%namingscheme directive $@" if $@; $ns = $ns->($self); } $ns ||= \&give_default_name; $self->{GRAMMAR}{NAMINGSCHEME} = $ns; # added to allow programmable production naming schemes (%name) $self; } ########### # Methods # ########### ########################## # Method To View Grammar # ########################## sub ShowRules { my($self)=shift; my($rules)=$$self{GRAMMAR}{RULES}; my($ruleno)=-1; my($text); for (@$rules) { my($lhs,$rhs)=@$_; $text.=++$ruleno.":\t".$lhs." -> "; if(@$rhs) { $text.=join(' ',map { $_ eq chr(0) ? '$end' : $_ } @$rhs); } else { $text.="/* empty */"; } $text.="\n"; } $text; } sub give_default_name { my ($self, $index, $lhs) = @_; my $name = "$lhs"."_$index"; return $name; } sub give_lhs_name { my ($self, $index, $lhs, $rhs) = @_; my $name = $lhs; return $name; } sub give_token_name { my ($self, $index, $lhs, $rhs) = @_; my @rhs = @$rhs; $rhs = ''; unless (@rhs) { # Empty RHS return $lhs.'_is_empty'; } my $names = $self->{GRAMMAR}{TOKENNAMES} || {}; for (@rhs) { if ($self->is_token($_)) { s/^'(.*)'$/$1/; my $name = $names->{$_} || ''; unless ($name) { $name = $_ if /^\w+$/; } $rhs .= "_$name" if $name; } } unless ($rhs) { # no 'word' tokens in the RHS for (@rhs) { $rhs .= "_$_" if /^\w+$/; } } # check if another production with such name exists? my $name = $lhs.'_is'.$rhs; return $name; } sub camelize { my $s = shift; my @a = split(/(?<=[A-Za-z])_(?=[A-Za-z])|\b/, $s); my $a = shift @a; @a = map { ucfirst $_ } @a; join('', ($a, @a)); } sub give_rhs_name { my ($self, $index, $lhs, $rhs) = @_; my @rhs = @$rhs; $rhs = ''; unless (@rhs) { # Empty RHS return camelize($lhs).'_is_empty'; } my $names = $self->{GRAMMAR}{TOKENNAMES} || {}; for (@rhs) { if ($self->is_token($_)) { # remove apostrophes s/^'(.*)'$/$1/; # explicit name given ? my $name = $names->{$_} || ''; # no name was given, use symbol if is an ID unless ($name) { $name = $_ if /^\w+$/; } $rhs .= "_$name" if $name; } else { # syntactic variable next if exists $self->{GRAMMAR}{CONFLICTHANDLERS}{$_}; $rhs .= '_'.camelize($_) if /^\w*$/; } } # check if another production with such name exists? my $name = camelize($lhs).'_is'.$rhs; return $name; } sub classname { my ($self, $name, $index, $lhs, $rhs) = @_; $name = $name->[0]; unless (defined($name)) { if ($lhs =~ /\$start/) { $name = "_SUPERSTART" } elsif ($lhs =~ /\@(\d+)-(\d+)/) { $name = "_CODE" } elsif ($lhs =~ /PAREN-(\d+)/) { $name = "_PAREN" } elsif ($lhs =~ /STAR-(\d+)/) { $name = "_STAR_LIST" } elsif ($lhs =~ /PLUS-(\d+)/) { $name = "_PLUS_LIST" } elsif ($lhs =~ /OPTIONAL-(\d+)/) { $name = "_OPTIONAL" } } my $naming_scheme = $self->{GRAMMAR}{NAMINGSCHEME}; if (!$name) { $name = $naming_scheme->($self, $index, $lhs, $rhs); } elsif ($name =~ /^:/) { # it is a label only $name = $naming_scheme->($self, $index, $lhs, $rhs).$name; } return $name; } # Added by Casiano ##################################### # Method To Return the Grammar Rules# ##################################### sub Rules { # TODO: find proper names my($self)=shift; my($rules)=$$self{GRAMMAR}{RULES}; my($text) = "[#[productionNameAndLabel => lhs, [ rhs], bypass]]\n"; my $packages = q{'TERMINAL', '_OPTIONAL', '_STAR_LIST', '_PLUS_LIST', }; my $index = 0; my $label = "{\n"; # To huild a reverse map label => production number for (@$rules) { my($lhs,$rhs,$prec,$name)=@$_; my $bypass = $name->[2]; $bypass = $self->Bypass unless defined($bypass); $label .= " '$1' => $index,\n" if defined($name->[0]) and $name->[0] =~ /(:.*)/; # find an acceptable perl identifier as name $name = $self->classname($name, $index, $lhs, $rhs); $label .= " '$name' => $index,\n"; $packages .= "\n".(" "x9)."'$name', "; $text.= " [ '$name' => '$lhs', [ "; $text.=join(', ',map { $_ eq chr(0) ? "'\$end'" : $_ =~ m{^'} ? $_ : "'$_'" } @$rhs); $text.=" ], $bypass ],\n"; $index++; } $text .= ']'; $label .= '}'; return ($text, $packages, $label); } # Added by Casiano ##################################### # Method To Return the Grammar Terms# ##################################### sub Terms { my($self)=shift; my(@terms)= sort(keys(%{$$self{GRAMMAR}{TERM}})); my %semantic = %{$self->{GRAMMAR}{SEMANTIC}}; my $text = "{ "; $text .= join(",\n\t", # Warning! bug. Before: map { $_ eq chr(0) ? "'\$end' => 0" : "$_ => $semantic{$_}"} @terms); map { $_ eq chr(0) ? "'' => { ISSEMANTIC => 0 }" : "$_ => { ISSEMANTIC => $semantic{$_} }"} @terms); $text .= ",\n\terror => { ISSEMANTIC => 0 },\n}"; } sub conflictHandlers { my $self = shift; my $t = Dumper $self->{GRAMMAR}{CONFLICTHANDLERS}; $t =~ s/^\$VAR\d*\s*=\s*//; $t =~s/;$//; $t =~s/\\'//g; # quotes inside quotes $t; } # produces the text mapping states to conflicthandlers sub stateConflict { my $self = shift; my $c = $self->{GRAMMAR}{CONFLICTHANDLERS}; my %stateConflict; my %t = (); for my $cn (keys %$c) { my $ce = $c->{$cn}; my $codeh = $ce->{codeh}; $codeh = "sub { $codeh }"; my @s = defined($ce->{states}) ? @{$ce->{states}} : (); for my $s (@s) { my ($sn) = keys %$s; #my ($tokens) = values %$s; #$tokens = join ',', @$tokens; $t{$sn} = '' unless defined($t{$sn}); $t{$sn} .= << "NEWSTATECONFLICTENTRY"; { name => '$cn', codeh => $codeh, }, NEWSTATECONFLICTENTRY } #for states } #for conflict names my $t = '{ '; for my $s (keys %t) { $t .= "$s => [ $t{$s} ],"; } $t .= ' }'; } ##################################### # Method To Return the Bypass Option# ##################################### sub Bypass { my($self)=shift; return $$self{GRAMMAR}{BYPASS} } ##################################### # Method To Return the Prefix Option# ##################################### sub Prefix { my($self)=shift; return $$self{GRAMMAR}{PREFIX} } sub Buildingtree { my($self)=shift; return $$self{GRAMMAR}{BUILDINGTREE} } sub Prompt { my $self = shift; return "our \$PROMPT = $$self{GRAMMAR}{INCREMENTAL};\n" if defined($$self{GRAMMAR}{INCREMENTAL}); return ''; } sub is_token { my($self)=shift; exists($self->{GRAMMAR}{TERM}{$_[0]}) } ##################################### # Method To Return the ACCESSORS ##################################### sub Accessors { my($self)=shift; return $$self{GRAMMAR}{ACCESSORS} } ########################### # Method To View Warnings # ########################### sub Warnings { my($self)=shift; return '' if $self->Option('start'); my($text) = ''; my($grammar)=$$self{GRAMMAR}; exists($$grammar{UUTERM}) and do { $text="Unused terminals:\n\n"; for (@{$$grammar{UUTERM}}) { $text.="\t$$_[0], declared line $$_[1]\n"; } $text.="\n"; }; exists($$grammar{UUNTERM}) and do { $text.="Useless non-terminals:\n\n"; for (@{$$grammar{UUNTERM}}) { $text.="\t$$_[0], declared line $$_[1]\n"; } $text.="\n"; }; exists($$grammar{UURULES}) and do { $text.="Useless rules:\n\n"; for (@{$$grammar{UURULES}}) { $text.="\t$$_[0] -> ".join(' ',@{$$_[1]})."\n"; } $text.="\n"; }; $text; } ###################################### # Method to get summary about parser # ###################################### sub Summary { my($self)=shift; my($text); $text ="Number of rules : ". scalar(@{$$self{GRAMMAR}{RULES}})."\n"; $text.="Number of terminals : ". scalar(keys(%{$$self{GRAMMAR}{TERM}}))."\n"; $text.="Number of non-terminals : ". scalar(keys(%{$$self{GRAMMAR}{NTERM}}))."\n"; $text; } ############################### # Method to Ouput rules table # ############################### sub RulesTable { my($self)=shift; my($inputfile)=$self->Option('inputfile'); my($linenums)=$self->Option('linenumbers'); my($rules)=$$self{GRAMMAR}{RULES}; my $ruleno = 0; my($text); defined($inputfile) or $inputfile = 'unknown'; $text="[\n\t"; $text.=join(",\n\t", map { my($lhs,$rhs,$rname,$code)=@$_[0,1,3,4]; my($len)=scalar(@$rhs); my($text); $rname = $self->classname($rname, $ruleno, $lhs, $rhs); $ruleno++; $text.="[#Rule $rname\n\t\t '$lhs', $len,"; if($code) { $text.= "\nsub {". ( $linenums ? qq(\n#line $$code[1] "$inputfile"\n) : " "). "$$code[0]}"; } else { $text.=' undef'; } $text.="\n$Parse::Eyapp::Output::pattern\n\t]"; $text; } @$rules); $text.="\n]"; $text; } ################################ # Methods to get HEAD and TAIL # ################################ sub Head { my($self)=shift; my($inputfile)=$self->Option('inputfile'); my($linenums)=$self->Option('linenumbers'); my($text); $$self{GRAMMAR}{HEAD}[0] or return ''; defined($inputfile) or $inputfile = 'unkown'; for (@{$$self{GRAMMAR}{HEAD}}) { $linenums and $text.=qq(#line $$_[1] "$inputfile"\n); $text.=$$_[0]; } $text } sub Tail { my($self)=shift; my($inputfile)=$self->Option('inputfile'); my($linenums)=$self->Option('linenumbers'); my($text); ((reftype $$self{GRAMMAR}{TAIL} eq 'ARRAY') and $$self{GRAMMAR}{TAIL}[0]) or return ''; defined($inputfile) or $inputfile = 'unkown'; $linenums and $text=qq(#line $$self{GRAMMAR}{TAIL}[1] "$inputfile"\n); $text.=$$self{GRAMMAR}{TAIL}[0]; $text } ################# # Private Stuff # ################# sub _UsefulRules { my($rules,$nterm) = @_; my($ufrules,$ufnterm); my($done); $ufrules=pack('b'.@$rules); $ufnterm={}; vec($ufrules,0,1)=1; #start rules IS always useful RULE: for (1..$#$rules) { # Ignore start rule for my $sym (@{$$rules[$_][1]}) { exists($$nterm{$sym}) and next RULE; } vec($ufrules,$_,1)=1; ++$$ufnterm{$$rules[$_][0]}; } do { $done=1; RULE: for (grep { vec($ufrules,$_,1) == 0 } 1..$#$rules) { for my $sym (@{$$rules[$_][1]}) { exists($$nterm{$sym}) and not exists($$ufnterm{$sym}) and next RULE; } vec($ufrules,$_,1)=1; exists($$ufnterm{$$rules[$_][0]}) or do { $done=0; ++$$ufnterm{$$rules[$_][0]}; }; } }until($done); ($ufrules,$ufnterm) }#_UsefulRules sub _Reachable { my($rules,$nterm,$term,$ufrules,$ufnterm)=@_; my($reachable); my(@fifo)=( 0 ); $reachable={ '$start' => 1 }; #$start is always reachable while(@fifo) { my($ruleno)=shift(@fifo); for my $sym (@{$$rules[$ruleno][1]}) { exists($$term{$sym}) and do { ++$$reachable{$sym}; next; }; ( not exists($$ufnterm{$sym}) or exists($$reachable{$sym}) ) and next; ++$$reachable{$sym}; push(@fifo, grep { vec($ufrules,$_,1) } @{$$nterm{$sym}}); } } $reachable }#_Reachable sub _SetNullable { my($rules,$term,$nullable) = @_; my(@nrules); my($done); RULE: for (@$rules) { my($lhs,$rhs)=@$_; exists($$nullable{$lhs}) and next; for (@$rhs) { exists($$term{$_}) and next RULE; } push(@nrules,[$lhs,$rhs]); } do { $done=1; RULE: for (@nrules) { my($lhs,$rhs)=@$_; exists($$nullable{$lhs}) and next; for (@$rhs) { exists($$nullable{$_}) or next RULE; } $done=0; ++$$nullable{$lhs}; } }until($done); } sub _ReduceGrammar { my($values)=@_; my($ufrules,$ufnterm,$reachable); my($grammar)= bless { HEAD => $values->{HEAD}, TAIL => $values->{TAIL}, EXPECT => $values->{EXPECT}, # Casiano modifications SEMANTIC => $values->{SEMANTIC}, # added to simplify AST BYPASS => $values->{BYPASS}, # added to simplify AST BUILDINGTREE => $values->{BUILDINGTREE}, # influences the semantic of lists * + ? ACCESSORS => $values->{ACCESSORS}, # getter-setter for %tree and %metatree PREFIX => $values->{PREFIX}, # yyprefix NAMINGSCHEME => $values->{NAMINGSCHEME}, # added to allow programmable production naming schemes (%name) NOCOMPACT => $values->{NOCOMPACT}, # Do not compact action tables. No DEFAULT field for "STATES" CONFLICTHANDLERS => $values->{CONFLICTHANDLERS}, # list of conflict handlers TERMDEF => $values->{TERMDEF}, # token => associated regular expression (for lexical analyzer) WHITES => $values->{WHITES}, # string with the code to skip whites (for lexical analyzer) LEXERISDEFINED => $values->{LEXERISDEFINED}, # true if %lexer was used INCREMENTAL => $values->{INCREMENTAL}, # true if '%incremental lexer' was used MODULINO => $values->{MODULINO}, # hash perlpath => path, prompt => question STRICT => $values->{STRICT}, # true if %stric DUMMY => $values->{DUMMY}, # array ref TOKENNAMES => {}, # for naming schemes }, __PACKAGE__; my($rules,$nterm,$term) = @$values {'RULES', 'NTERM', 'TERM'}; ($ufrules,$ufnterm) = _UsefulRules($rules,$nterm); exists($$ufnterm{$values->{START}}) or die "*Fatal* Start symbol $values->{START} derives nothing, at eof\n"; $reachable = _Reachable($rules,$nterm,$term,$ufrules,$ufnterm); $$grammar{TERM}{chr(0)}=undef; for my $sym (keys %$term) { ( exists($$reachable{$sym}) or exists($values->{PREC}{$sym}) ) and do { $$grammar{TERM}{$sym} = defined($$term{$sym}[0]) ? $$term{$sym} : undef; next; }; push(@{$$grammar{UUTERM}},[ $sym, $values->{SYMS}{$sym} ]); } $$grammar{NTERM}{'$start'}=[]; for my $sym (keys %$nterm) { exists($$reachable{$sym}) and do { exists($values->{NULL}{$sym}) and ++$$grammar{NULLABLE}{$sym}; $$grammar{NTERM}{$sym}=[]; next; }; push(@{$$grammar{UUNTERM}},[ $sym, $values->{SYMS}{$sym} ]); } for my $ruleno (0..$#$rules) { vec($ufrules,$ruleno,1) and exists($$grammar{NTERM}{$$rules[$ruleno][0]}) and do { push(@{$$grammar{RULES}},$$rules[$ruleno]); push(@{$$grammar{NTERM}{$$rules[$ruleno][0]}},$#{$$grammar{RULES}}); next; }; push(@{$$grammar{UURULES}},[ @{$$rules[$ruleno]}[0,1] ]); } _SetNullable(@$grammar{'RULES', 'TERM', 'NULLABLE'}); $grammar; }#_ReduceGrammar sub tokennames { my $self = shift; my $grammar = $self->{GRAMMAR}; $grammar->{TOKENNAMES} = { (%{$grammar->{TOKENNAMES}}, @_) } if (@_); $grammar->{TOKENNAMES} } 1;