aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/tools/ragel6/parsetree.cpp
diff options
context:
space:
mode:
authorsmalov <smalov@yandex-team.ru>2022-02-10 16:47:36 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:47:36 +0300
commitcfadda92ca195da3ad68d721a58872a4f1ced696 (patch)
treec0748b5dcbade83af788c0abfa89c0383d6b779c /contrib/tools/ragel6/parsetree.cpp
parentf70d9720e13aef3a935e3f405b0eac554529e76e (diff)
downloadydb-cfadda92ca195da3ad68d721a58872a4f1ced696.tar.gz
Restoring authorship annotation for <smalov@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'contrib/tools/ragel6/parsetree.cpp')
-rw-r--r--contrib/tools/ragel6/parsetree.cpp4268
1 files changed, 2134 insertions, 2134 deletions
diff --git a/contrib/tools/ragel6/parsetree.cpp b/contrib/tools/ragel6/parsetree.cpp
index 8d6a73722c..ff538aa28e 100644
--- a/contrib/tools/ragel6/parsetree.cpp
+++ b/contrib/tools/ragel6/parsetree.cpp
@@ -1,2139 +1,2139 @@
-/*
- * Copyright 2001-2006 Adrian Thurston <thurston@complang.org>
- */
-
-/* This file is part of Ragel.
+/*
+ * Copyright 2001-2006 Adrian Thurston <thurston@complang.org>
+ */
+
+/* This file is part of Ragel.
+ *
+ * Ragel is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
*
- * Ragel is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * Ragel is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Ragel; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include <iostream>
-#include <iomanip>
-#include <errno.h>
-#include <limits.h>
-#include <stdlib.h>
-
-/* Parsing. */
-#include "ragel.h"
-#include "rlparse.h"
-#include "parsetree.h"
-
-using namespace std;
-ostream &operator<<( ostream &out, const NameRef &nameRef );
-ostream &operator<<( ostream &out, const NameInst &nameInst );
-
-/* Convert the literal string which comes in from the scanner into an array of
- * characters with escapes and options interpreted. Also null terminates the
- * string. Though this null termination should not be relied on for
- * interpreting literals in the parser because the string may contain \0 */
-char *prepareLitString( const InputLoc &loc, const char *data, long length,
- long &resLen, bool &caseInsensitive )
-{
- char *resData = new char[length+1];
- caseInsensitive = false;
-
- const char *src = data + 1;
- const char *end = data + length - 1;
-
- while ( *end != '\'' && *end != '\"' ) {
- if ( *end == 'i' )
- caseInsensitive = true;
- else {
- error( loc ) << "literal string '" << *end <<
- "' option not supported" << endl;
- }
- end -= 1;
- }
-
- char *dest = resData;
- long len = 0;
- while ( src != end ) {
- if ( *src == '\\' ) {
- switch ( src[1] ) {
- case '0': dest[len++] = '\0'; break;
- case 'a': dest[len++] = '\a'; break;
- case 'b': dest[len++] = '\b'; break;
- case 't': dest[len++] = '\t'; break;
- case 'n': dest[len++] = '\n'; break;
- case 'v': dest[len++] = '\v'; break;
- case 'f': dest[len++] = '\f'; break;
- case 'r': dest[len++] = '\r'; break;
- case '\n': break;
- default: dest[len++] = src[1]; break;
- }
- src += 2;
- }
- else {
- dest[len++] = *src++;
- }
- }
-
- resLen = len;
- resData[resLen] = 0;
- return resData;
-}
-
-FsmAp *VarDef::walk( ParseData *pd )
-{
- /* We enter into a new name scope. */
- NameFrame nameFrame = pd->enterNameScope( true, 1 );
-
- /* Recurse on the expression. */
- FsmAp *rtnVal = machineDef->walk( pd );
-
- /* Do the tranfer of local error actions. */
- LocalErrDictEl *localErrDictEl = pd->localErrDict.find( name );
- if ( localErrDictEl != 0 ) {
- for ( StateList::Iter state = rtnVal->stateList; state.lte(); state++ )
- rtnVal->transferErrorActions( state, localErrDictEl->value );
- }
-
- /* If the expression below is a join operation with multiple expressions
- * then it just had epsilon transisions resolved. If it is a join
- * with only a single expression then run the epsilon op now. */
- if ( machineDef->type == MachineDef::JoinType && machineDef->join->exprList.length() == 1 )
- rtnVal->epsilonOp();
-
- /* We can now unset entry points that are not longer used. */
- pd->unsetObsoleteEntries( rtnVal );
-
- /* If the name of the variable is referenced then add the entry point to
- * the graph. */
- if ( pd->curNameInst->numRefs > 0 )
- rtnVal->setEntry( pd->curNameInst->id, rtnVal->startState );
-
- /* Pop the name scope. */
- pd->popNameScope( nameFrame );
- return rtnVal;
-}
-
-void VarDef::makeNameTree( const InputLoc &loc, ParseData *pd )
-{
- /* The variable definition enters a new scope. */
- NameInst *prevNameInst = pd->curNameInst;
- pd->curNameInst = pd->addNameInst( loc, name, false );
-
- if ( machineDef->type == MachineDef::LongestMatchType )
- pd->curNameInst->isLongestMatch = true;
-
- /* Recurse. */
- machineDef->makeNameTree( pd );
-
- /* The name scope ends, pop the name instantiation. */
- pd->curNameInst = prevNameInst;
-}
-
-void VarDef::resolveNameRefs( ParseData *pd )
-{
- /* Entering into a new scope. */
- NameFrame nameFrame = pd->enterNameScope( true, 1 );
-
- /* Recurse. */
- machineDef->resolveNameRefs( pd );
-
- /* The name scope ends, pop the name instantiation. */
- pd->popNameScope( nameFrame );
-}
-
-InputLoc LongestMatchPart::getLoc()
-{
- return action != 0 ? action->loc : semiLoc;
-}
-
-/*
- * If there are any LMs then all of the following entry points must reset
- * tokstart:
+ * Ragel is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
*
- * 1. fentry(StateRef)
- * 2. ftoto(StateRef), fcall(StateRef), fnext(StateRef)
- * 3. targt of any transition that has an fcall (the return loc).
- * 4. start state of all longest match routines.
- */
-
-Action *LongestMatch::newAction( ParseData *pd, const InputLoc &loc,
- const char *name, InlineList *inlineList )
-{
- Action *action = new Action( loc, name, inlineList, pd->nextCondId++ );
- action->actionRefs.append( pd->curNameInst );
- pd->actionList.append( action );
- action->isLmAction = true;
- return action;
-}
-
-void LongestMatch::makeActions( ParseData *pd )
-{
- /* Make actions that set the action id. */
- for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
- /* For each part create actions for setting the match type. We need
- * to do this so that the actions will go into the actionIndex. */
- InlineList *inlineList = new InlineList;
- inlineList->append( new InlineItem( lmi->getLoc(), this, lmi,
- InlineItem::LmSetActId ) );
- char *actName = new char[50];
- sprintf( actName, "store%i", lmi->longestMatchId );
- lmi->setActId = newAction( pd, lmi->getLoc(), actName, inlineList );
- }
-
- /* Make actions that execute the user action and restart on the last
- * character. */
- for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
- /* For each part create actions for setting the match type. We need
- * to do this so that the actions will go into the actionIndex. */
- InlineList *inlineList = new InlineList;
- inlineList->append( new InlineItem( lmi->getLoc(), this, lmi,
- InlineItem::LmOnLast ) );
- char *actName = new char[50];
- sprintf( actName, "last%i", lmi->longestMatchId );
- lmi->actOnLast = newAction( pd, lmi->getLoc(), actName, inlineList );
- }
-
- /* Make actions that execute the user action and restart on the next
- * character. These actions will set tokend themselves (it is the current
- * char). */
- for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
- /* For each part create actions for setting the match type. We need
- * to do this so that the actions will go into the actionIndex. */
- InlineList *inlineList = new InlineList;
- inlineList->append( new InlineItem( lmi->getLoc(), this, lmi,
- InlineItem::LmOnNext ) );
- char *actName = new char[50];
- sprintf( actName, "next%i", lmi->longestMatchId );
- lmi->actOnNext = newAction( pd, lmi->getLoc(), actName, inlineList );
- }
-
- /* Make actions that execute the user action and restart at tokend. These
- * actions execute some time after matching the last char. */
- for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
- /* For each part create actions for setting the match type. We need
- * to do this so that the actions will go into the actionIndex. */
- InlineList *inlineList = new InlineList;
- inlineList->append( new InlineItem( lmi->getLoc(), this, lmi,
- InlineItem::LmOnLagBehind ) );
- char *actName = new char[50];
- sprintf( actName, "lag%i", lmi->longestMatchId );
- lmi->actLagBehind = newAction( pd, lmi->getLoc(), actName, inlineList );
- }
-
- InputLoc loc;
- loc.line = 1;
- loc.col = 1;
- loc.fileName = "NONE";
-
- /* Create the error action. */
- InlineList *il6 = new InlineList;
- il6->append( new InlineItem( loc, this, 0, InlineItem::LmSwitch ) );
- lmActSelect = newAction( pd, loc, "switch", il6 );
-}
-
-void LongestMatch::findName( ParseData *pd )
-{
- NameInst *nameInst = pd->curNameInst;
- while ( nameInst->name == 0 ) {
- nameInst = nameInst->parent;
- /* Since every machine must must have a name, we should always find a
- * name for the longest match. */
- assert( nameInst != 0 );
- }
- name = nameInst->name;
-}
-
-void LongestMatch::makeNameTree( ParseData *pd )
-{
- /* Create an anonymous scope for the longest match. Will be used for
- * restarting machine after matching a token. */
- NameInst *prevNameInst = pd->curNameInst;
- pd->curNameInst = pd->addNameInst( loc, 0, false );
-
- /* Recurse into all parts of the longest match operator. */
- for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ )
- lmi->join->makeNameTree( pd );
-
- /* Traverse the name tree upwards to find a name for this lm. */
- findName( pd );
-
- /* Also make the longest match's actions at this point. */
- makeActions( pd );
-
- /* The name scope ends, pop the name instantiation. */
- pd->curNameInst = prevNameInst;
-}
-
-void LongestMatch::resolveNameRefs( ParseData *pd )
-{
- /* The longest match gets its own name scope. */
- NameFrame nameFrame = pd->enterNameScope( true, 1 );
-
- /* Take an action reference for each longest match item and recurse. */
- for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
- /* Record the reference if the item has an action. */
- if ( lmi->action != 0 )
- lmi->action->actionRefs.append( pd->localNameScope );
-
- /* Recurse down the join. */
- lmi->join->resolveNameRefs( pd );
- }
-
- /* The name scope ends, pop the name instantiation. */
- pd->popNameScope( nameFrame );
-}
-
-void LongestMatch::restart( FsmAp *graph, TransAp *trans )
-{
- StateAp *fromState = trans->fromState;
- graph->detachTrans( fromState, trans->toState, trans );
- graph->attachTrans( fromState, graph->startState, trans );
-}
-
-void LongestMatch::runLongestMatch( ParseData *pd, FsmAp *graph )
-{
- graph->markReachableFromHereStopFinal( graph->startState );
- for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
- if ( ms->stateBits & STB_ISMARKED ) {
- ms->lmItemSet.insert( 0 );
- ms->stateBits &= ~ STB_ISMARKED;
- }
- }
-
- /* Transfer the first item of non-empty lmAction tables to the item sets
- * of the states that follow. Exclude states that have no transitions out.
- * This must happen on a separate pass so that on each iteration of the
- * next pass we have the item set entries from all lmAction tables. */
- for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
- for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) {
- if ( trans->lmActionTable.length() > 0 ) {
- LmActionTableEl *lmAct = trans->lmActionTable.data;
- StateAp *toState = trans->toState;
- assert( toState );
-
- /* Can only optimize this if there are no transitions out.
- * Note there can be out transitions going nowhere with
- * actions and they too must inhibit this optimization. */
- if ( toState->outList.length() > 0 ) {
- /* Fill the item sets. */
- graph->markReachableFromHereStopFinal( toState );
- for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
- if ( ms->stateBits & STB_ISMARKED ) {
- ms->lmItemSet.insert( lmAct->value );
- ms->stateBits &= ~ STB_ISMARKED;
- }
- }
- }
- }
- }
- }
-
- /* The lmItem sets are now filled, telling us which longest match rules
- * can succeed in which states. First determine if we need to make sure
- * act is defaulted to zero. We need to do this if there are any states
- * with lmItemSet.length() > 1 and NULL is included. That is, that the
- * switch may get called when in fact nothing has been matched. */
- int maxItemSetLength = 0;
- graph->markReachableFromHereStopFinal( graph->startState );
- for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
- if ( ms->stateBits & STB_ISMARKED ) {
- if ( ms->lmItemSet.length() > maxItemSetLength )
- maxItemSetLength = ms->lmItemSet.length();
- ms->stateBits &= ~ STB_ISMARKED;
- }
- }
-
- /* The actions executed on starting to match a token. */
- graph->isolateStartState();
- graph->startState->toStateActionTable.setAction( pd->initTokStartOrd, pd->initTokStart );
- graph->startState->fromStateActionTable.setAction( pd->setTokStartOrd, pd->setTokStart );
- if ( maxItemSetLength > 1 ) {
- /* The longest match action switch may be called when tokens are
- * matched, in which case act must be initialized, there must be a
- * case to handle the error, and the generated machine will require an
- * error state. */
- lmSwitchHandlesError = true;
- pd->lmRequiresErrorState = true;
- graph->startState->toStateActionTable.setAction( pd->initActIdOrd, pd->initActId );
- }
-
- /* The place to store transitions to restart. It maybe possible for the
- * restarting to affect the searching through the graph that follows. For
- * now take the safe route and save the list of transitions to restart
- * until after all searching is done. */
- Vector<TransAp*> restartTrans;
-
- /* Set actions that do immediate token recognition, set the longest match part
- * id and set the token ending. */
- for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
- for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) {
- if ( trans->lmActionTable.length() > 0 ) {
- LmActionTableEl *lmAct = trans->lmActionTable.data;
- StateAp *toState = trans->toState;
- assert( toState );
-
- /* Can only optimize this if there are no transitions out.
- * Note there can be out transitions going nowhere with
- * actions and they too must inhibit this optimization. */
- if ( toState->outList.length() == 0 ) {
- /* Can execute the immediate action for the longest match
- * part. Redirect the action to the start state.
- *
- * NOTE: When we need to inhibit on_last due to leaving
- * actions the above test suffices. If the state has out
- * actions then it will fail because the out action will
- * have been transferred to an error transition, which
- * makes the outlist non-empty. */
- trans->actionTable.setAction( lmAct->key,
- lmAct->value->actOnLast );
- restartTrans.append( trans );
- }
- else {
- /* Look for non final states that have a non-empty item
- * set. If these are present then we need to record the
- * end of the token. Also Find the highest item set
- * length reachable from here (excluding at transtions to
- * final states). */
- bool nonFinalNonEmptyItemSet = false;
- maxItemSetLength = 0;
- graph->markReachableFromHereStopFinal( toState );
- for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
- if ( ms->stateBits & STB_ISMARKED ) {
- if ( ms->lmItemSet.length() > 0 && !ms->isFinState() )
- nonFinalNonEmptyItemSet = true;
- if ( ms->lmItemSet.length() > maxItemSetLength )
- maxItemSetLength = ms->lmItemSet.length();
- ms->stateBits &= ~ STB_ISMARKED;
- }
- }
-
- /* If there are reachable states that are not final and
- * have non empty item sets or that have an item set
- * length greater than one then we need to set tokend
- * because the error action that matches the token will
- * require it. */
- if ( nonFinalNonEmptyItemSet || maxItemSetLength > 1 )
- trans->actionTable.setAction( pd->setTokEndOrd, pd->setTokEnd );
-
- /* Some states may not know which longest match item to
- * execute, must set it. */
- if ( maxItemSetLength > 1 ) {
- /* There are transitions out, another match may come. */
- trans->actionTable.setAction( lmAct->key,
- lmAct->value->setActId );
- }
- }
- }
- }
- }
-
- /* Now that all graph searching is done it certainly safe set the
- * restarting. It may be safe above, however this must be verified. */
- for ( Vector<TransAp*>::Iter pt = restartTrans; pt.lte(); pt++ )
- restart( graph, *pt );
-
- int lmErrActionOrd = pd->curActionOrd++;
-
- /* Embed the error for recognizing a char. */
- for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
- if ( st->lmItemSet.length() == 1 && st->lmItemSet[0] != 0 ) {
- if ( st->isFinState() ) {
- /* On error execute the onActNext action, which knows that
- * the last character of the token was one back and restart. */
- graph->setErrorTarget( st, graph->startState, &lmErrActionOrd,
- &st->lmItemSet[0]->actOnNext, 1 );
- st->eofActionTable.setAction( lmErrActionOrd,
- st->lmItemSet[0]->actOnNext );
- st->eofTarget = graph->startState;
- }
- else {
- graph->setErrorTarget( st, graph->startState, &lmErrActionOrd,
- &st->lmItemSet[0]->actLagBehind, 1 );
- st->eofActionTable.setAction( lmErrActionOrd,
- st->lmItemSet[0]->actLagBehind );
- st->eofTarget = graph->startState;
- }
- }
- else if ( st->lmItemSet.length() > 1 ) {
- /* Need to use the select. Take note of which items the select
- * is needed for so only the necessary actions are included. */
- for ( LmItemSet::Iter plmi = st->lmItemSet; plmi.lte(); plmi++ ) {
- if ( *plmi != 0 )
- (*plmi)->inLmSelect = true;
- }
- /* On error, execute the action select and go to the start state. */
- graph->setErrorTarget( st, graph->startState, &lmErrActionOrd,
- &lmActSelect, 1 );
- st->eofActionTable.setAction( lmErrActionOrd, lmActSelect );
- st->eofTarget = graph->startState;
- }
- }
-
- /* Finally, the start state should be made final. */
- graph->setFinState( graph->startState );
-}
-
-void LongestMatch::transferScannerLeavingActions( FsmAp *graph )
-{
- for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
- if ( st->outActionTable.length() > 0 )
- graph->setErrorActions( st, st->outActionTable );
- }
-}
-
-FsmAp *LongestMatch::walk( ParseData *pd )
-{
- /* The longest match has it's own name scope. */
- NameFrame nameFrame = pd->enterNameScope( true, 1 );
-
- /* Make each part of the longest match. */
- FsmAp **parts = new FsmAp*[longestMatchList->length()];
- LmPartList::Iter lmi = *longestMatchList;
- for ( int i = 0; lmi.lte(); lmi++, i++ ) {
- /* Create the machine and embed the setting of the longest match id. */
- parts[i] = lmi->join->walk( pd );
- parts[i]->longMatchAction( pd->curActionOrd++, lmi );
- }
-
- /* Before we union the patterns we need to deal with leaving actions. They
- * are transfered to error transitions out of the final states (like local
- * error actions) and to eof actions. In the scanner we need to forbid
- * on_last for any final state that has an leaving action. */
- for ( int i = 0; i < longestMatchList->length(); i++ )
- transferScannerLeavingActions( parts[i] );
-
- /* Union machines one and up with machine zero. The grammar dictates that
- * there will always be at least one part. */
- FsmAp *rtnVal = parts[0];
- for ( int i = 1; i < longestMatchList->length(); i++ ) {
- rtnVal->unionOp( parts[i] );
- afterOpMinimize( rtnVal );
- }
-
- runLongestMatch( pd, rtnVal );
-
- /* Pop the name scope. */
- pd->popNameScope( nameFrame );
-
- delete[] parts;
- return rtnVal;
-}
-
-FsmAp *MachineDef::walk( ParseData *pd )
-{
- FsmAp *rtnVal = 0;
- switch ( type ) {
- case JoinType:
- rtnVal = join->walk( pd );
- break;
- case LongestMatchType:
- rtnVal = longestMatch->walk( pd );
- break;
- case LengthDefType:
- condData->lastCondKey.increment();
- rtnVal = new FsmAp();
- rtnVal->concatFsm( condData->lastCondKey );
- break;
- }
- return rtnVal;
-}
-
-void MachineDef::makeNameTree( ParseData *pd )
-{
- switch ( type ) {
- case JoinType:
- join->makeNameTree( pd );
- break;
- case LongestMatchType:
- longestMatch->makeNameTree( pd );
- break;
- case LengthDefType:
- break;
- }
-}
-
-void MachineDef::resolveNameRefs( ParseData *pd )
-{
- switch ( type ) {
- case JoinType:
- join->resolveNameRefs( pd );
- break;
- case LongestMatchType:
- longestMatch->resolveNameRefs( pd );
- break;
- case LengthDefType:
- break;
- }
-}
-
-
-/* Construct with a location and the first expression. */
-Join::Join( const InputLoc &loc, Expression *expr )
-:
- loc(loc)
-{
- exprList.append( expr );
-}
-
-/* Construct with a location and the first expression. */
-Join::Join( Expression *expr )
-{
- exprList.append( expr );
-}
-
-/* Walk an expression node. */
-FsmAp *Join::walk( ParseData *pd )
-{
- if ( exprList.length() > 1 )
- return walkJoin( pd );
- else
- return exprList.head->walk( pd );
-}
-
-/* There is a list of expressions to join. */
-FsmAp *Join::walkJoin( ParseData *pd )
-{
- /* We enter into a new name scope. */
- NameFrame nameFrame = pd->enterNameScope( true, 1 );
-
- /* Evaluate the machines. */
- FsmAp **fsms = new FsmAp*[exprList.length()];
- ExprList::Iter expr = exprList;
- for ( int e = 0; e < exprList.length(); e++, expr++ )
- fsms[e] = expr->walk( pd );
-
- /* Get the start and final names. Final is
- * guaranteed to exist, start is not. */
- NameInst *startName = pd->curNameInst->start;
- NameInst *finalName = pd->curNameInst->final;
-
- int startId = -1;
- if ( startName != 0 ) {
- /* Take note that there was an implicit link to the start machine. */
- pd->localNameScope->referencedNames.append( startName );
- startId = startName->id;
- }
-
- /* A final id of -1 indicates there is no epsilon that references the
- * final state, therefor do not create one or set an entry point to it. */
- int finalId = -1;
- if ( finalName->numRefs > 0 )
- finalId = finalName->id;
-
- /* Join machines 1 and up onto machine 0. */
- FsmAp *retFsm = fsms[0];
- retFsm->joinOp( startId, finalId, fsms+1, exprList.length()-1 );
-
- /* We can now unset entry points that are not longer used. */
- pd->unsetObsoleteEntries( retFsm );
-
- /* Pop the name scope. */
- pd->popNameScope( nameFrame );
-
- delete[] fsms;
- return retFsm;
-}
-
-void Join::makeNameTree( ParseData *pd )
-{
- if ( exprList.length() > 1 ) {
- /* Create the new anonymous scope. */
- NameInst *prevNameInst = pd->curNameInst;
- pd->curNameInst = pd->addNameInst( loc, 0, false );
-
- /* Join scopes need an implicit "final" target. */
- pd->curNameInst->final = new NameInst( InputLoc(), pd->curNameInst, "final",
- pd->nextNameId++, false );
-
- /* Recurse into all expressions in the list. */
- for ( ExprList::Iter expr = exprList; expr.lte(); expr++ )
- expr->makeNameTree( pd );
-
- /* The name scope ends, pop the name instantiation. */
- pd->curNameInst = prevNameInst;
- }
- else {
- /* Recurse into the single expression. */
- exprList.head->makeNameTree( pd );
- }
-}
-
-
-void Join::resolveNameRefs( ParseData *pd )
-{
- /* Branch on whether or not there is to be a join. */
- if ( exprList.length() > 1 ) {
- /* The variable definition enters a new scope. */
- NameFrame nameFrame = pd->enterNameScope( true, 1 );
-
- /* The join scope must contain a start label. */
- NameSet resolved = pd->resolvePart( pd->localNameScope, "start", true );
- if ( resolved.length() > 0 ) {
- /* Take the first. */
- pd->curNameInst->start = resolved[0];
- if ( resolved.length() > 1 ) {
- /* Complain about the multiple references. */
- error(loc) << "join operation has multiple start labels" << endl;
- errorStateLabels( resolved );
- }
- }
-
- /* Make sure there is a start label. */
- if ( pd->curNameInst->start != 0 ) {
- /* There is an implicit reference to start name. */
- pd->curNameInst->start->numRefs += 1;
- }
- else {
- /* No start label. */
- error(loc) << "join operation has no start label" << endl;
- }
-
- /* Recurse into all expressions in the list. */
- for ( ExprList::Iter expr = exprList; expr.lte(); expr++ )
- expr->resolveNameRefs( pd );
-
- /* The name scope ends, pop the name instantiation. */
- pd->popNameScope( nameFrame );
- }
- else {
- /* Recurse into the single expression. */
- exprList.head->resolveNameRefs( pd );
- }
-}
-
-/* Clean up after an expression node. */
-Expression::~Expression()
-{
- switch ( type ) {
- case OrType: case IntersectType: case SubtractType:
- case StrongSubtractType:
- delete expression;
- delete term;
- break;
- case TermType:
- delete term;
- break;
- case BuiltinType:
- break;
- }
-}
-
-/* Evaluate a single expression node. */
-FsmAp *Expression::walk( ParseData *pd, bool lastInSeq )
-{
- FsmAp *rtnVal = 0;
- switch ( type ) {
- case OrType: {
- /* Evaluate the expression. */
- rtnVal = expression->walk( pd, false );
- /* Evaluate the term. */
- FsmAp *rhs = term->walk( pd );
- /* Perform union. */
- rtnVal->unionOp( rhs );
- afterOpMinimize( rtnVal, lastInSeq );
- break;
- }
- case IntersectType: {
- /* Evaluate the expression. */
- rtnVal = expression->walk( pd );
- /* Evaluate the term. */
- FsmAp *rhs = term->walk( pd );
- /* Perform intersection. */
- rtnVal->intersectOp( rhs );
- afterOpMinimize( rtnVal, lastInSeq );
- break;
- }
- case SubtractType: {
- /* Evaluate the expression. */
- rtnVal = expression->walk( pd );
- /* Evaluate the term. */
- FsmAp *rhs = term->walk( pd );
- /* Perform subtraction. */
- rtnVal->subtractOp( rhs );
- afterOpMinimize( rtnVal, lastInSeq );
- break;
- }
- case StrongSubtractType: {
- /* Evaluate the expression. */
- rtnVal = expression->walk( pd );
-
- /* Evaluate the term and pad it with any* machines. */
- FsmAp *rhs = dotStarFsm( pd );
- FsmAp *termFsm = term->walk( pd );
- FsmAp *trailAnyStar = dotStarFsm( pd );
- rhs->concatOp( termFsm );
- rhs->concatOp( trailAnyStar );
-
- /* Perform subtraction. */
- rtnVal->subtractOp( rhs );
- afterOpMinimize( rtnVal, lastInSeq );
- break;
- }
- case TermType: {
- /* Return result of the term. */
- rtnVal = term->walk( pd );
- break;
- }
- case BuiltinType: {
- /* Duplicate the builtin. */
- rtnVal = makeBuiltin( builtin, pd );
- break;
- }
- }
-
- return rtnVal;
-}
-
-void Expression::makeNameTree( ParseData *pd )
-{
- switch ( type ) {
- case OrType:
- case IntersectType:
- case SubtractType:
- case StrongSubtractType:
- expression->makeNameTree( pd );
- term->makeNameTree( pd );
- break;
- case TermType:
- term->makeNameTree( pd );
- break;
- case BuiltinType:
- break;
- }
-}
-
-void Expression::resolveNameRefs( ParseData *pd )
-{
- switch ( type ) {
- case OrType:
- case IntersectType:
- case SubtractType:
- case StrongSubtractType:
- expression->resolveNameRefs( pd );
- term->resolveNameRefs( pd );
- break;
- case TermType:
- term->resolveNameRefs( pd );
- break;
- case BuiltinType:
- break;
- }
-}
-
-/* Clean up after a term node. */
-Term::~Term()
-{
- switch ( type ) {
- case ConcatType:
- case RightStartType:
- case RightFinishType:
- case LeftType:
- delete term;
- delete factorWithAug;
- break;
- case FactorWithAugType:
- delete factorWithAug;
- break;
- }
-}
-
-/* Evaluate a term node. */
-FsmAp *Term::walk( ParseData *pd, bool lastInSeq )
-{
- FsmAp *rtnVal = 0;
- switch ( type ) {
- case ConcatType: {
- /* Evaluate the Term. */
- rtnVal = term->walk( pd, false );
- /* Evaluate the FactorWithRep. */
- FsmAp *rhs = factorWithAug->walk( pd );
- /* Perform concatenation. */
- rtnVal->concatOp( rhs );
- afterOpMinimize( rtnVal, lastInSeq );
- break;
- }
- case RightStartType: {
- /* Evaluate the Term. */
- rtnVal = term->walk( pd );
-
- /* Evaluate the FactorWithRep. */
- FsmAp *rhs = factorWithAug->walk( pd );
-
- /* Set up the priority descriptors. The left machine gets the
- * lower priority where as the right get the higher start priority. */
- priorDescs[0].key = pd->nextPriorKey++;
- priorDescs[0].priority = 0;
- rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
-
- /* The start transitions of the right machine gets the higher
- * priority. Use the same unique key. */
- priorDescs[1].key = priorDescs[0].key;
- priorDescs[1].priority = 1;
- rhs->startFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
-
- /* Perform concatenation. */
- rtnVal->concatOp( rhs );
- afterOpMinimize( rtnVal, lastInSeq );
- break;
- }
- case RightFinishType: {
- /* Evaluate the Term. */
- rtnVal = term->walk( pd );
-
- /* Evaluate the FactorWithRep. */
- FsmAp *rhs = factorWithAug->walk( pd );
-
- /* Set up the priority descriptors. The left machine gets the
- * lower priority where as the finishing transitions to the right
- * get the higher priority. */
- priorDescs[0].key = pd->nextPriorKey++;
- priorDescs[0].priority = 0;
- rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
-
- /* The finishing transitions of the right machine get the higher
- * priority. Use the same unique key. */
- priorDescs[1].key = priorDescs[0].key;
- priorDescs[1].priority = 1;
- rhs->finishFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
-
- /* If the right machine's start state is final we need to guard
- * against the left machine persisting by moving through the empty
- * string. */
- if ( rhs->startState->isFinState() ) {
- rhs->startState->outPriorTable.setPrior(
- pd->curPriorOrd++, &priorDescs[1] );
- }
-
- /* Perform concatenation. */
- rtnVal->concatOp( rhs );
- afterOpMinimize( rtnVal, lastInSeq );
- break;
- }
- case LeftType: {
- /* Evaluate the Term. */
- rtnVal = term->walk( pd );
-
- /* Evaluate the FactorWithRep. */
- FsmAp *rhs = factorWithAug->walk( pd );
-
- /* Set up the priority descriptors. The left machine gets the
- * higher priority. */
- priorDescs[0].key = pd->nextPriorKey++;
- priorDescs[0].priority = 1;
- rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
-
- /* The right machine gets the lower priority. We cannot use
- * allTransPrior here in case the start state of the right machine
- * is final. It would allow the right machine thread to run along
- * with the left if just passing through the start state. Using
- * startFsmPrior prevents this. */
- priorDescs[1].key = priorDescs[0].key;
- priorDescs[1].priority = 0;
- rhs->startFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
-
- /* Perform concatenation. */
- rtnVal->concatOp( rhs );
- afterOpMinimize( rtnVal, lastInSeq );
- break;
- }
- case FactorWithAugType: {
- rtnVal = factorWithAug->walk( pd );
- break;
- }
- }
- return rtnVal;
-}
-
-void Term::makeNameTree( ParseData *pd )
-{
- switch ( type ) {
- case ConcatType:
- case RightStartType:
- case RightFinishType:
- case LeftType:
- term->makeNameTree( pd );
- factorWithAug->makeNameTree( pd );
- break;
- case FactorWithAugType:
- factorWithAug->makeNameTree( pd );
- break;
- }
-}
-
-void Term::resolveNameRefs( ParseData *pd )
-{
- switch ( type ) {
- case ConcatType:
- case RightStartType:
- case RightFinishType:
- case LeftType:
- term->resolveNameRefs( pd );
- factorWithAug->resolveNameRefs( pd );
- break;
- case FactorWithAugType:
- factorWithAug->resolveNameRefs( pd );
- break;
- }
-}
-
-/* Clean up after a factor with augmentation node. */
-FactorWithAug::~FactorWithAug()
+ * You should have received a copy of the GNU General Public License
+ * along with Ragel; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <iostream>
+#include <iomanip>
+#include <errno.h>
+#include <limits.h>
+#include <stdlib.h>
+
+/* Parsing. */
+#include "ragel.h"
+#include "rlparse.h"
+#include "parsetree.h"
+
+using namespace std;
+ostream &operator<<( ostream &out, const NameRef &nameRef );
+ostream &operator<<( ostream &out, const NameInst &nameInst );
+
+/* Convert the literal string which comes in from the scanner into an array of
+ * characters with escapes and options interpreted. Also null terminates the
+ * string. Though this null termination should not be relied on for
+ * interpreting literals in the parser because the string may contain \0 */
+char *prepareLitString( const InputLoc &loc, const char *data, long length,
+ long &resLen, bool &caseInsensitive )
+{
+ char *resData = new char[length+1];
+ caseInsensitive = false;
+
+ const char *src = data + 1;
+ const char *end = data + length - 1;
+
+ while ( *end != '\'' && *end != '\"' ) {
+ if ( *end == 'i' )
+ caseInsensitive = true;
+ else {
+ error( loc ) << "literal string '" << *end <<
+ "' option not supported" << endl;
+ }
+ end -= 1;
+ }
+
+ char *dest = resData;
+ long len = 0;
+ while ( src != end ) {
+ if ( *src == '\\' ) {
+ switch ( src[1] ) {
+ case '0': dest[len++] = '\0'; break;
+ case 'a': dest[len++] = '\a'; break;
+ case 'b': dest[len++] = '\b'; break;
+ case 't': dest[len++] = '\t'; break;
+ case 'n': dest[len++] = '\n'; break;
+ case 'v': dest[len++] = '\v'; break;
+ case 'f': dest[len++] = '\f'; break;
+ case 'r': dest[len++] = '\r'; break;
+ case '\n': break;
+ default: dest[len++] = src[1]; break;
+ }
+ src += 2;
+ }
+ else {
+ dest[len++] = *src++;
+ }
+ }
+
+ resLen = len;
+ resData[resLen] = 0;
+ return resData;
+}
+
+FsmAp *VarDef::walk( ParseData *pd )
+{
+ /* We enter into a new name scope. */
+ NameFrame nameFrame = pd->enterNameScope( true, 1 );
+
+ /* Recurse on the expression. */
+ FsmAp *rtnVal = machineDef->walk( pd );
+
+ /* Do the tranfer of local error actions. */
+ LocalErrDictEl *localErrDictEl = pd->localErrDict.find( name );
+ if ( localErrDictEl != 0 ) {
+ for ( StateList::Iter state = rtnVal->stateList; state.lte(); state++ )
+ rtnVal->transferErrorActions( state, localErrDictEl->value );
+ }
+
+ /* If the expression below is a join operation with multiple expressions
+ * then it just had epsilon transisions resolved. If it is a join
+ * with only a single expression then run the epsilon op now. */
+ if ( machineDef->type == MachineDef::JoinType && machineDef->join->exprList.length() == 1 )
+ rtnVal->epsilonOp();
+
+ /* We can now unset entry points that are not longer used. */
+ pd->unsetObsoleteEntries( rtnVal );
+
+ /* If the name of the variable is referenced then add the entry point to
+ * the graph. */
+ if ( pd->curNameInst->numRefs > 0 )
+ rtnVal->setEntry( pd->curNameInst->id, rtnVal->startState );
+
+ /* Pop the name scope. */
+ pd->popNameScope( nameFrame );
+ return rtnVal;
+}
+
+void VarDef::makeNameTree( const InputLoc &loc, ParseData *pd )
+{
+ /* The variable definition enters a new scope. */
+ NameInst *prevNameInst = pd->curNameInst;
+ pd->curNameInst = pd->addNameInst( loc, name, false );
+
+ if ( machineDef->type == MachineDef::LongestMatchType )
+ pd->curNameInst->isLongestMatch = true;
+
+ /* Recurse. */
+ machineDef->makeNameTree( pd );
+
+ /* The name scope ends, pop the name instantiation. */
+ pd->curNameInst = prevNameInst;
+}
+
+void VarDef::resolveNameRefs( ParseData *pd )
+{
+ /* Entering into a new scope. */
+ NameFrame nameFrame = pd->enterNameScope( true, 1 );
+
+ /* Recurse. */
+ machineDef->resolveNameRefs( pd );
+
+ /* The name scope ends, pop the name instantiation. */
+ pd->popNameScope( nameFrame );
+}
+
+InputLoc LongestMatchPart::getLoc()
{
- delete factorWithRep;
-
- /* Walk the vector of parser actions, deleting function names. */
-
- /* Clean up priority descriptors. */
- if ( priorDescs != 0 )
- delete[] priorDescs;
-}
-
-void FactorWithAug::assignActions( ParseData *pd, FsmAp *graph, int *actionOrd )
-{
- /* Assign actions. */
- for ( int i = 0; i < actions.length(); i++ ) {
- switch ( actions[i].type ) {
- /* Transition actions. */
- case at_start:
- graph->startFsmAction( actionOrd[i], actions[i].action );
- afterOpMinimize( graph );
- break;
- case at_all:
- graph->allTransAction( actionOrd[i], actions[i].action );
- break;
- case at_finish:
- graph->finishFsmAction( actionOrd[i], actions[i].action );
- break;
- case at_leave:
- graph->leaveFsmAction( actionOrd[i], actions[i].action );
- break;
-
- /* Global error actions. */
- case at_start_gbl_error:
- graph->startErrorAction( actionOrd[i], actions[i].action, 0 );
- afterOpMinimize( graph );
- break;
- case at_all_gbl_error:
- graph->allErrorAction( actionOrd[i], actions[i].action, 0 );
- break;
- case at_final_gbl_error:
- graph->finalErrorAction( actionOrd[i], actions[i].action, 0 );
- break;
- case at_not_start_gbl_error:
- graph->notStartErrorAction( actionOrd[i], actions[i].action, 0 );
- break;
- case at_not_final_gbl_error:
- graph->notFinalErrorAction( actionOrd[i], actions[i].action, 0 );
- break;
- case at_middle_gbl_error:
- graph->middleErrorAction( actionOrd[i], actions[i].action, 0 );
- break;
-
- /* Local error actions. */
- case at_start_local_error:
- graph->startErrorAction( actionOrd[i], actions[i].action,
- actions[i].localErrKey );
- afterOpMinimize( graph );
- break;
- case at_all_local_error:
- graph->allErrorAction( actionOrd[i], actions[i].action,
- actions[i].localErrKey );
- break;
- case at_final_local_error:
- graph->finalErrorAction( actionOrd[i], actions[i].action,
- actions[i].localErrKey );
- break;
- case at_not_start_local_error:
- graph->notStartErrorAction( actionOrd[i], actions[i].action,
- actions[i].localErrKey );
- break;
- case at_not_final_local_error:
- graph->notFinalErrorAction( actionOrd[i], actions[i].action,
- actions[i].localErrKey );
- break;
- case at_middle_local_error:
- graph->middleErrorAction( actionOrd[i], actions[i].action,
- actions[i].localErrKey );
- break;
-
- /* EOF actions. */
- case at_start_eof:
- graph->startEOFAction( actionOrd[i], actions[i].action );
- afterOpMinimize( graph );
- break;
- case at_all_eof:
- graph->allEOFAction( actionOrd[i], actions[i].action );
- break;
- case at_final_eof:
- graph->finalEOFAction( actionOrd[i], actions[i].action );
- break;
- case at_not_start_eof:
- graph->notStartEOFAction( actionOrd[i], actions[i].action );
- break;
- case at_not_final_eof:
- graph->notFinalEOFAction( actionOrd[i], actions[i].action );
- break;
- case at_middle_eof:
- graph->middleEOFAction( actionOrd[i], actions[i].action );
- break;
-
- /* To State Actions. */
- case at_start_to_state:
- graph->startToStateAction( actionOrd[i], actions[i].action );
- afterOpMinimize( graph );
- break;
- case at_all_to_state:
- graph->allToStateAction( actionOrd[i], actions[i].action );
- break;
- case at_final_to_state:
- graph->finalToStateAction( actionOrd[i], actions[i].action );
- break;
- case at_not_start_to_state:
- graph->notStartToStateAction( actionOrd[i], actions[i].action );
- break;
- case at_not_final_to_state:
- graph->notFinalToStateAction( actionOrd[i], actions[i].action );
- break;
- case at_middle_to_state:
- graph->middleToStateAction( actionOrd[i], actions[i].action );
- break;
-
- /* From State Actions. */
- case at_start_from_state:
- graph->startFromStateAction( actionOrd[i], actions[i].action );
- afterOpMinimize( graph );
- break;
- case at_all_from_state:
- graph->allFromStateAction( actionOrd[i], actions[i].action );
- break;
- case at_final_from_state:
- graph->finalFromStateAction( actionOrd[i], actions[i].action );
- break;
- case at_not_start_from_state:
- graph->notStartFromStateAction( actionOrd[i], actions[i].action );
- break;
- case at_not_final_from_state:
- graph->notFinalFromStateAction( actionOrd[i], actions[i].action );
- break;
- case at_middle_from_state:
- graph->middleFromStateAction( actionOrd[i], actions[i].action );
- break;
-
- /* Remaining cases, prevented by the parser. */
- default:
- assert( false );
- break;
- }
- }
-}
-
-void FactorWithAug::assignPriorities( FsmAp *graph, int *priorOrd )
-{
- /* Assign priorities. */
- for ( int i = 0; i < priorityAugs.length(); i++ ) {
- switch ( priorityAugs[i].type ) {
- case at_start:
- graph->startFsmPrior( priorOrd[i], &priorDescs[i]);
- /* Start fsm priorities are a special case that may require
- * minimization afterwards. */
- afterOpMinimize( graph );
- break;
- case at_all:
- graph->allTransPrior( priorOrd[i], &priorDescs[i] );
- break;
- case at_finish:
- graph->finishFsmPrior( priorOrd[i], &priorDescs[i] );
- break;
- case at_leave:
- graph->leaveFsmPrior( priorOrd[i], &priorDescs[i] );
- break;
-
- default:
- /* Parser Prevents this case. */
- break;
- }
- }
-}
-
-void FactorWithAug::assignConditions( FsmAp *graph )
-{
- for ( int i = 0; i < conditions.length(); i++ ) {
- switch ( conditions[i].type ) {
- /* Transition actions. */
- case at_start:
- graph->startFsmCondition( conditions[i].action, conditions[i].sense );
- afterOpMinimize( graph );
- break;
- case at_all:
- graph->allTransCondition( conditions[i].action, conditions[i].sense );
- break;
- case at_leave:
- graph->leaveFsmCondition( conditions[i].action, conditions[i].sense );
- break;
+ return action != 0 ? action->loc : semiLoc;
+}
+
+/*
+ * If there are any LMs then all of the following entry points must reset
+ * tokstart:
+ *
+ * 1. fentry(StateRef)
+ * 2. ftoto(StateRef), fcall(StateRef), fnext(StateRef)
+ * 3. targt of any transition that has an fcall (the return loc).
+ * 4. start state of all longest match routines.
+ */
+
+Action *LongestMatch::newAction( ParseData *pd, const InputLoc &loc,
+ const char *name, InlineList *inlineList )
+{
+ Action *action = new Action( loc, name, inlineList, pd->nextCondId++ );
+ action->actionRefs.append( pd->curNameInst );
+ pd->actionList.append( action );
+ action->isLmAction = true;
+ return action;
+}
+
+void LongestMatch::makeActions( ParseData *pd )
+{
+ /* Make actions that set the action id. */
+ for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
+ /* For each part create actions for setting the match type. We need
+ * to do this so that the actions will go into the actionIndex. */
+ InlineList *inlineList = new InlineList;
+ inlineList->append( new InlineItem( lmi->getLoc(), this, lmi,
+ InlineItem::LmSetActId ) );
+ char *actName = new char[50];
+ sprintf( actName, "store%i", lmi->longestMatchId );
+ lmi->setActId = newAction( pd, lmi->getLoc(), actName, inlineList );
+ }
+
+ /* Make actions that execute the user action and restart on the last
+ * character. */
+ for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
+ /* For each part create actions for setting the match type. We need
+ * to do this so that the actions will go into the actionIndex. */
+ InlineList *inlineList = new InlineList;
+ inlineList->append( new InlineItem( lmi->getLoc(), this, lmi,
+ InlineItem::LmOnLast ) );
+ char *actName = new char[50];
+ sprintf( actName, "last%i", lmi->longestMatchId );
+ lmi->actOnLast = newAction( pd, lmi->getLoc(), actName, inlineList );
+ }
+
+ /* Make actions that execute the user action and restart on the next
+ * character. These actions will set tokend themselves (it is the current
+ * char). */
+ for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
+ /* For each part create actions for setting the match type. We need
+ * to do this so that the actions will go into the actionIndex. */
+ InlineList *inlineList = new InlineList;
+ inlineList->append( new InlineItem( lmi->getLoc(), this, lmi,
+ InlineItem::LmOnNext ) );
+ char *actName = new char[50];
+ sprintf( actName, "next%i", lmi->longestMatchId );
+ lmi->actOnNext = newAction( pd, lmi->getLoc(), actName, inlineList );
+ }
+
+ /* Make actions that execute the user action and restart at tokend. These
+ * actions execute some time after matching the last char. */
+ for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
+ /* For each part create actions for setting the match type. We need
+ * to do this so that the actions will go into the actionIndex. */
+ InlineList *inlineList = new InlineList;
+ inlineList->append( new InlineItem( lmi->getLoc(), this, lmi,
+ InlineItem::LmOnLagBehind ) );
+ char *actName = new char[50];
+ sprintf( actName, "lag%i", lmi->longestMatchId );
+ lmi->actLagBehind = newAction( pd, lmi->getLoc(), actName, inlineList );
+ }
+
+ InputLoc loc;
+ loc.line = 1;
+ loc.col = 1;
+ loc.fileName = "NONE";
+
+ /* Create the error action. */
+ InlineList *il6 = new InlineList;
+ il6->append( new InlineItem( loc, this, 0, InlineItem::LmSwitch ) );
+ lmActSelect = newAction( pd, loc, "switch", il6 );
+}
+
+void LongestMatch::findName( ParseData *pd )
+{
+ NameInst *nameInst = pd->curNameInst;
+ while ( nameInst->name == 0 ) {
+ nameInst = nameInst->parent;
+ /* Since every machine must must have a name, we should always find a
+ * name for the longest match. */
+ assert( nameInst != 0 );
+ }
+ name = nameInst->name;
+}
+
+void LongestMatch::makeNameTree( ParseData *pd )
+{
+ /* Create an anonymous scope for the longest match. Will be used for
+ * restarting machine after matching a token. */
+ NameInst *prevNameInst = pd->curNameInst;
+ pd->curNameInst = pd->addNameInst( loc, 0, false );
+
+ /* Recurse into all parts of the longest match operator. */
+ for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ )
+ lmi->join->makeNameTree( pd );
+
+ /* Traverse the name tree upwards to find a name for this lm. */
+ findName( pd );
+
+ /* Also make the longest match's actions at this point. */
+ makeActions( pd );
+
+ /* The name scope ends, pop the name instantiation. */
+ pd->curNameInst = prevNameInst;
+}
+
+void LongestMatch::resolveNameRefs( ParseData *pd )
+{
+ /* The longest match gets its own name scope. */
+ NameFrame nameFrame = pd->enterNameScope( true, 1 );
+
+ /* Take an action reference for each longest match item and recurse. */
+ for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
+ /* Record the reference if the item has an action. */
+ if ( lmi->action != 0 )
+ lmi->action->actionRefs.append( pd->localNameScope );
+
+ /* Recurse down the join. */
+ lmi->join->resolveNameRefs( pd );
+ }
+
+ /* The name scope ends, pop the name instantiation. */
+ pd->popNameScope( nameFrame );
+}
+
+void LongestMatch::restart( FsmAp *graph, TransAp *trans )
+{
+ StateAp *fromState = trans->fromState;
+ graph->detachTrans( fromState, trans->toState, trans );
+ graph->attachTrans( fromState, graph->startState, trans );
+}
+
+void LongestMatch::runLongestMatch( ParseData *pd, FsmAp *graph )
+{
+ graph->markReachableFromHereStopFinal( graph->startState );
+ for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
+ if ( ms->stateBits & STB_ISMARKED ) {
+ ms->lmItemSet.insert( 0 );
+ ms->stateBits &= ~ STB_ISMARKED;
+ }
+ }
+
+ /* Transfer the first item of non-empty lmAction tables to the item sets
+ * of the states that follow. Exclude states that have no transitions out.
+ * This must happen on a separate pass so that on each iteration of the
+ * next pass we have the item set entries from all lmAction tables. */
+ for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
+ for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) {
+ if ( trans->lmActionTable.length() > 0 ) {
+ LmActionTableEl *lmAct = trans->lmActionTable.data;
+ StateAp *toState = trans->toState;
+ assert( toState );
+
+ /* Can only optimize this if there are no transitions out.
+ * Note there can be out transitions going nowhere with
+ * actions and they too must inhibit this optimization. */
+ if ( toState->outList.length() > 0 ) {
+ /* Fill the item sets. */
+ graph->markReachableFromHereStopFinal( toState );
+ for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
+ if ( ms->stateBits & STB_ISMARKED ) {
+ ms->lmItemSet.insert( lmAct->value );
+ ms->stateBits &= ~ STB_ISMARKED;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ /* The lmItem sets are now filled, telling us which longest match rules
+ * can succeed in which states. First determine if we need to make sure
+ * act is defaulted to zero. We need to do this if there are any states
+ * with lmItemSet.length() > 1 and NULL is included. That is, that the
+ * switch may get called when in fact nothing has been matched. */
+ int maxItemSetLength = 0;
+ graph->markReachableFromHereStopFinal( graph->startState );
+ for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
+ if ( ms->stateBits & STB_ISMARKED ) {
+ if ( ms->lmItemSet.length() > maxItemSetLength )
+ maxItemSetLength = ms->lmItemSet.length();
+ ms->stateBits &= ~ STB_ISMARKED;
+ }
+ }
+
+ /* The actions executed on starting to match a token. */
+ graph->isolateStartState();
+ graph->startState->toStateActionTable.setAction( pd->initTokStartOrd, pd->initTokStart );
+ graph->startState->fromStateActionTable.setAction( pd->setTokStartOrd, pd->setTokStart );
+ if ( maxItemSetLength > 1 ) {
+ /* The longest match action switch may be called when tokens are
+ * matched, in which case act must be initialized, there must be a
+ * case to handle the error, and the generated machine will require an
+ * error state. */
+ lmSwitchHandlesError = true;
+ pd->lmRequiresErrorState = true;
+ graph->startState->toStateActionTable.setAction( pd->initActIdOrd, pd->initActId );
+ }
+
+ /* The place to store transitions to restart. It maybe possible for the
+ * restarting to affect the searching through the graph that follows. For
+ * now take the safe route and save the list of transitions to restart
+ * until after all searching is done. */
+ Vector<TransAp*> restartTrans;
+
+ /* Set actions that do immediate token recognition, set the longest match part
+ * id and set the token ending. */
+ for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
+ for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) {
+ if ( trans->lmActionTable.length() > 0 ) {
+ LmActionTableEl *lmAct = trans->lmActionTable.data;
+ StateAp *toState = trans->toState;
+ assert( toState );
+
+ /* Can only optimize this if there are no transitions out.
+ * Note there can be out transitions going nowhere with
+ * actions and they too must inhibit this optimization. */
+ if ( toState->outList.length() == 0 ) {
+ /* Can execute the immediate action for the longest match
+ * part. Redirect the action to the start state.
+ *
+ * NOTE: When we need to inhibit on_last due to leaving
+ * actions the above test suffices. If the state has out
+ * actions then it will fail because the out action will
+ * have been transferred to an error transition, which
+ * makes the outlist non-empty. */
+ trans->actionTable.setAction( lmAct->key,
+ lmAct->value->actOnLast );
+ restartTrans.append( trans );
+ }
+ else {
+ /* Look for non final states that have a non-empty item
+ * set. If these are present then we need to record the
+ * end of the token. Also Find the highest item set
+ * length reachable from here (excluding at transtions to
+ * final states). */
+ bool nonFinalNonEmptyItemSet = false;
+ maxItemSetLength = 0;
+ graph->markReachableFromHereStopFinal( toState );
+ for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
+ if ( ms->stateBits & STB_ISMARKED ) {
+ if ( ms->lmItemSet.length() > 0 && !ms->isFinState() )
+ nonFinalNonEmptyItemSet = true;
+ if ( ms->lmItemSet.length() > maxItemSetLength )
+ maxItemSetLength = ms->lmItemSet.length();
+ ms->stateBits &= ~ STB_ISMARKED;
+ }
+ }
+
+ /* If there are reachable states that are not final and
+ * have non empty item sets or that have an item set
+ * length greater than one then we need to set tokend
+ * because the error action that matches the token will
+ * require it. */
+ if ( nonFinalNonEmptyItemSet || maxItemSetLength > 1 )
+ trans->actionTable.setAction( pd->setTokEndOrd, pd->setTokEnd );
+
+ /* Some states may not know which longest match item to
+ * execute, must set it. */
+ if ( maxItemSetLength > 1 ) {
+ /* There are transitions out, another match may come. */
+ trans->actionTable.setAction( lmAct->key,
+ lmAct->value->setActId );
+ }
+ }
+ }
+ }
+ }
+
+ /* Now that all graph searching is done it certainly safe set the
+ * restarting. It may be safe above, however this must be verified. */
+ for ( Vector<TransAp*>::Iter pt = restartTrans; pt.lte(); pt++ )
+ restart( graph, *pt );
+
+ int lmErrActionOrd = pd->curActionOrd++;
+
+ /* Embed the error for recognizing a char. */
+ for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
+ if ( st->lmItemSet.length() == 1 && st->lmItemSet[0] != 0 ) {
+ if ( st->isFinState() ) {
+ /* On error execute the onActNext action, which knows that
+ * the last character of the token was one back and restart. */
+ graph->setErrorTarget( st, graph->startState, &lmErrActionOrd,
+ &st->lmItemSet[0]->actOnNext, 1 );
+ st->eofActionTable.setAction( lmErrActionOrd,
+ st->lmItemSet[0]->actOnNext );
+ st->eofTarget = graph->startState;
+ }
+ else {
+ graph->setErrorTarget( st, graph->startState, &lmErrActionOrd,
+ &st->lmItemSet[0]->actLagBehind, 1 );
+ st->eofActionTable.setAction( lmErrActionOrd,
+ st->lmItemSet[0]->actLagBehind );
+ st->eofTarget = graph->startState;
+ }
+ }
+ else if ( st->lmItemSet.length() > 1 ) {
+ /* Need to use the select. Take note of which items the select
+ * is needed for so only the necessary actions are included. */
+ for ( LmItemSet::Iter plmi = st->lmItemSet; plmi.lte(); plmi++ ) {
+ if ( *plmi != 0 )
+ (*plmi)->inLmSelect = true;
+ }
+ /* On error, execute the action select and go to the start state. */
+ graph->setErrorTarget( st, graph->startState, &lmErrActionOrd,
+ &lmActSelect, 1 );
+ st->eofActionTable.setAction( lmErrActionOrd, lmActSelect );
+ st->eofTarget = graph->startState;
+ }
+ }
+
+ /* Finally, the start state should be made final. */
+ graph->setFinState( graph->startState );
+}
+
+void LongestMatch::transferScannerLeavingActions( FsmAp *graph )
+{
+ for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
+ if ( st->outActionTable.length() > 0 )
+ graph->setErrorActions( st, st->outActionTable );
+ }
+}
+
+FsmAp *LongestMatch::walk( ParseData *pd )
+{
+ /* The longest match has it's own name scope. */
+ NameFrame nameFrame = pd->enterNameScope( true, 1 );
+
+ /* Make each part of the longest match. */
+ FsmAp **parts = new FsmAp*[longestMatchList->length()];
+ LmPartList::Iter lmi = *longestMatchList;
+ for ( int i = 0; lmi.lte(); lmi++, i++ ) {
+ /* Create the machine and embed the setting of the longest match id. */
+ parts[i] = lmi->join->walk( pd );
+ parts[i]->longMatchAction( pd->curActionOrd++, lmi );
+ }
+
+ /* Before we union the patterns we need to deal with leaving actions. They
+ * are transfered to error transitions out of the final states (like local
+ * error actions) and to eof actions. In the scanner we need to forbid
+ * on_last for any final state that has an leaving action. */
+ for ( int i = 0; i < longestMatchList->length(); i++ )
+ transferScannerLeavingActions( parts[i] );
+
+ /* Union machines one and up with machine zero. The grammar dictates that
+ * there will always be at least one part. */
+ FsmAp *rtnVal = parts[0];
+ for ( int i = 1; i < longestMatchList->length(); i++ ) {
+ rtnVal->unionOp( parts[i] );
+ afterOpMinimize( rtnVal );
+ }
+
+ runLongestMatch( pd, rtnVal );
+
+ /* Pop the name scope. */
+ pd->popNameScope( nameFrame );
+
+ delete[] parts;
+ return rtnVal;
+}
+
+FsmAp *MachineDef::walk( ParseData *pd )
+{
+ FsmAp *rtnVal = 0;
+ switch ( type ) {
+ case JoinType:
+ rtnVal = join->walk( pd );
+ break;
+ case LongestMatchType:
+ rtnVal = longestMatch->walk( pd );
+ break;
+ case LengthDefType:
+ condData->lastCondKey.increment();
+ rtnVal = new FsmAp();
+ rtnVal->concatFsm( condData->lastCondKey );
+ break;
+ }
+ return rtnVal;
+}
+
+void MachineDef::makeNameTree( ParseData *pd )
+{
+ switch ( type ) {
+ case JoinType:
+ join->makeNameTree( pd );
+ break;
+ case LongestMatchType:
+ longestMatch->makeNameTree( pd );
+ break;
+ case LengthDefType:
+ break;
+ }
+}
+
+void MachineDef::resolveNameRefs( ParseData *pd )
+{
+ switch ( type ) {
+ case JoinType:
+ join->resolveNameRefs( pd );
+ break;
+ case LongestMatchType:
+ longestMatch->resolveNameRefs( pd );
+ break;
+ case LengthDefType:
+ break;
+ }
+}
+
+
+/* Construct with a location and the first expression. */
+Join::Join( const InputLoc &loc, Expression *expr )
+:
+ loc(loc)
+{
+ exprList.append( expr );
+}
+
+/* Construct with a location and the first expression. */
+Join::Join( Expression *expr )
+{
+ exprList.append( expr );
+}
+
+/* Walk an expression node. */
+FsmAp *Join::walk( ParseData *pd )
+{
+ if ( exprList.length() > 1 )
+ return walkJoin( pd );
+ else
+ return exprList.head->walk( pd );
+}
+
+/* There is a list of expressions to join. */
+FsmAp *Join::walkJoin( ParseData *pd )
+{
+ /* We enter into a new name scope. */
+ NameFrame nameFrame = pd->enterNameScope( true, 1 );
+
+ /* Evaluate the machines. */
+ FsmAp **fsms = new FsmAp*[exprList.length()];
+ ExprList::Iter expr = exprList;
+ for ( int e = 0; e < exprList.length(); e++, expr++ )
+ fsms[e] = expr->walk( pd );
+
+ /* Get the start and final names. Final is
+ * guaranteed to exist, start is not. */
+ NameInst *startName = pd->curNameInst->start;
+ NameInst *finalName = pd->curNameInst->final;
+
+ int startId = -1;
+ if ( startName != 0 ) {
+ /* Take note that there was an implicit link to the start machine. */
+ pd->localNameScope->referencedNames.append( startName );
+ startId = startName->id;
+ }
+
+ /* A final id of -1 indicates there is no epsilon that references the
+ * final state, therefor do not create one or set an entry point to it. */
+ int finalId = -1;
+ if ( finalName->numRefs > 0 )
+ finalId = finalName->id;
+
+ /* Join machines 1 and up onto machine 0. */
+ FsmAp *retFsm = fsms[0];
+ retFsm->joinOp( startId, finalId, fsms+1, exprList.length()-1 );
+
+ /* We can now unset entry points that are not longer used. */
+ pd->unsetObsoleteEntries( retFsm );
+
+ /* Pop the name scope. */
+ pd->popNameScope( nameFrame );
+
+ delete[] fsms;
+ return retFsm;
+}
+
+void Join::makeNameTree( ParseData *pd )
+{
+ if ( exprList.length() > 1 ) {
+ /* Create the new anonymous scope. */
+ NameInst *prevNameInst = pd->curNameInst;
+ pd->curNameInst = pd->addNameInst( loc, 0, false );
+
+ /* Join scopes need an implicit "final" target. */
+ pd->curNameInst->final = new NameInst( InputLoc(), pd->curNameInst, "final",
+ pd->nextNameId++, false );
+
+ /* Recurse into all expressions in the list. */
+ for ( ExprList::Iter expr = exprList; expr.lte(); expr++ )
+ expr->makeNameTree( pd );
+
+ /* The name scope ends, pop the name instantiation. */
+ pd->curNameInst = prevNameInst;
+ }
+ else {
+ /* Recurse into the single expression. */
+ exprList.head->makeNameTree( pd );
+ }
+}
+
+
+void Join::resolveNameRefs( ParseData *pd )
+{
+ /* Branch on whether or not there is to be a join. */
+ if ( exprList.length() > 1 ) {
+ /* The variable definition enters a new scope. */
+ NameFrame nameFrame = pd->enterNameScope( true, 1 );
+
+ /* The join scope must contain a start label. */
+ NameSet resolved = pd->resolvePart( pd->localNameScope, "start", true );
+ if ( resolved.length() > 0 ) {
+ /* Take the first. */
+ pd->curNameInst->start = resolved[0];
+ if ( resolved.length() > 1 ) {
+ /* Complain about the multiple references. */
+ error(loc) << "join operation has multiple start labels" << endl;
+ errorStateLabels( resolved );
+ }
+ }
+
+ /* Make sure there is a start label. */
+ if ( pd->curNameInst->start != 0 ) {
+ /* There is an implicit reference to start name. */
+ pd->curNameInst->start->numRefs += 1;
+ }
+ else {
+ /* No start label. */
+ error(loc) << "join operation has no start label" << endl;
+ }
+
+ /* Recurse into all expressions in the list. */
+ for ( ExprList::Iter expr = exprList; expr.lte(); expr++ )
+ expr->resolveNameRefs( pd );
+
+ /* The name scope ends, pop the name instantiation. */
+ pd->popNameScope( nameFrame );
+ }
+ else {
+ /* Recurse into the single expression. */
+ exprList.head->resolveNameRefs( pd );
+ }
+}
+
+/* Clean up after an expression node. */
+Expression::~Expression()
+{
+ switch ( type ) {
+ case OrType: case IntersectType: case SubtractType:
+ case StrongSubtractType:
+ delete expression;
+ delete term;
+ break;
+ case TermType:
+ delete term;
+ break;
+ case BuiltinType:
+ break;
+ }
+}
+
+/* Evaluate a single expression node. */
+FsmAp *Expression::walk( ParseData *pd, bool lastInSeq )
+{
+ FsmAp *rtnVal = 0;
+ switch ( type ) {
+ case OrType: {
+ /* Evaluate the expression. */
+ rtnVal = expression->walk( pd, false );
+ /* Evaluate the term. */
+ FsmAp *rhs = term->walk( pd );
+ /* Perform union. */
+ rtnVal->unionOp( rhs );
+ afterOpMinimize( rtnVal, lastInSeq );
+ break;
+ }
+ case IntersectType: {
+ /* Evaluate the expression. */
+ rtnVal = expression->walk( pd );
+ /* Evaluate the term. */
+ FsmAp *rhs = term->walk( pd );
+ /* Perform intersection. */
+ rtnVal->intersectOp( rhs );
+ afterOpMinimize( rtnVal, lastInSeq );
+ break;
+ }
+ case SubtractType: {
+ /* Evaluate the expression. */
+ rtnVal = expression->walk( pd );
+ /* Evaluate the term. */
+ FsmAp *rhs = term->walk( pd );
+ /* Perform subtraction. */
+ rtnVal->subtractOp( rhs );
+ afterOpMinimize( rtnVal, lastInSeq );
+ break;
+ }
+ case StrongSubtractType: {
+ /* Evaluate the expression. */
+ rtnVal = expression->walk( pd );
+
+ /* Evaluate the term and pad it with any* machines. */
+ FsmAp *rhs = dotStarFsm( pd );
+ FsmAp *termFsm = term->walk( pd );
+ FsmAp *trailAnyStar = dotStarFsm( pd );
+ rhs->concatOp( termFsm );
+ rhs->concatOp( trailAnyStar );
+
+ /* Perform subtraction. */
+ rtnVal->subtractOp( rhs );
+ afterOpMinimize( rtnVal, lastInSeq );
+ break;
+ }
+ case TermType: {
+ /* Return result of the term. */
+ rtnVal = term->walk( pd );
+ break;
+ }
+ case BuiltinType: {
+ /* Duplicate the builtin. */
+ rtnVal = makeBuiltin( builtin, pd );
+ break;
+ }
+ }
+
+ return rtnVal;
+}
+
+void Expression::makeNameTree( ParseData *pd )
+{
+ switch ( type ) {
+ case OrType:
+ case IntersectType:
+ case SubtractType:
+ case StrongSubtractType:
+ expression->makeNameTree( pd );
+ term->makeNameTree( pd );
+ break;
+ case TermType:
+ term->makeNameTree( pd );
+ break;
+ case BuiltinType:
+ break;
+ }
+}
+
+void Expression::resolveNameRefs( ParseData *pd )
+{
+ switch ( type ) {
+ case OrType:
+ case IntersectType:
+ case SubtractType:
+ case StrongSubtractType:
+ expression->resolveNameRefs( pd );
+ term->resolveNameRefs( pd );
+ break;
+ case TermType:
+ term->resolveNameRefs( pd );
+ break;
+ case BuiltinType:
+ break;
+ }
+}
+
+/* Clean up after a term node. */
+Term::~Term()
+{
+ switch ( type ) {
+ case ConcatType:
+ case RightStartType:
+ case RightFinishType:
+ case LeftType:
+ delete term;
+ delete factorWithAug;
+ break;
+ case FactorWithAugType:
+ delete factorWithAug;
+ break;
+ }
+}
+
+/* Evaluate a term node. */
+FsmAp *Term::walk( ParseData *pd, bool lastInSeq )
+{
+ FsmAp *rtnVal = 0;
+ switch ( type ) {
+ case ConcatType: {
+ /* Evaluate the Term. */
+ rtnVal = term->walk( pd, false );
+ /* Evaluate the FactorWithRep. */
+ FsmAp *rhs = factorWithAug->walk( pd );
+ /* Perform concatenation. */
+ rtnVal->concatOp( rhs );
+ afterOpMinimize( rtnVal, lastInSeq );
+ break;
+ }
+ case RightStartType: {
+ /* Evaluate the Term. */
+ rtnVal = term->walk( pd );
+
+ /* Evaluate the FactorWithRep. */
+ FsmAp *rhs = factorWithAug->walk( pd );
+
+ /* Set up the priority descriptors. The left machine gets the
+ * lower priority where as the right get the higher start priority. */
+ priorDescs[0].key = pd->nextPriorKey++;
+ priorDescs[0].priority = 0;
+ rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
+
+ /* The start transitions of the right machine gets the higher
+ * priority. Use the same unique key. */
+ priorDescs[1].key = priorDescs[0].key;
+ priorDescs[1].priority = 1;
+ rhs->startFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
+
+ /* Perform concatenation. */
+ rtnVal->concatOp( rhs );
+ afterOpMinimize( rtnVal, lastInSeq );
+ break;
+ }
+ case RightFinishType: {
+ /* Evaluate the Term. */
+ rtnVal = term->walk( pd );
+
+ /* Evaluate the FactorWithRep. */
+ FsmAp *rhs = factorWithAug->walk( pd );
+
+ /* Set up the priority descriptors. The left machine gets the
+ * lower priority where as the finishing transitions to the right
+ * get the higher priority. */
+ priorDescs[0].key = pd->nextPriorKey++;
+ priorDescs[0].priority = 0;
+ rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
+
+ /* The finishing transitions of the right machine get the higher
+ * priority. Use the same unique key. */
+ priorDescs[1].key = priorDescs[0].key;
+ priorDescs[1].priority = 1;
+ rhs->finishFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
+
+ /* If the right machine's start state is final we need to guard
+ * against the left machine persisting by moving through the empty
+ * string. */
+ if ( rhs->startState->isFinState() ) {
+ rhs->startState->outPriorTable.setPrior(
+ pd->curPriorOrd++, &priorDescs[1] );
+ }
+
+ /* Perform concatenation. */
+ rtnVal->concatOp( rhs );
+ afterOpMinimize( rtnVal, lastInSeq );
+ break;
+ }
+ case LeftType: {
+ /* Evaluate the Term. */
+ rtnVal = term->walk( pd );
+
+ /* Evaluate the FactorWithRep. */
+ FsmAp *rhs = factorWithAug->walk( pd );
+
+ /* Set up the priority descriptors. The left machine gets the
+ * higher priority. */
+ priorDescs[0].key = pd->nextPriorKey++;
+ priorDescs[0].priority = 1;
+ rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
+
+ /* The right machine gets the lower priority. We cannot use
+ * allTransPrior here in case the start state of the right machine
+ * is final. It would allow the right machine thread to run along
+ * with the left if just passing through the start state. Using
+ * startFsmPrior prevents this. */
+ priorDescs[1].key = priorDescs[0].key;
+ priorDescs[1].priority = 0;
+ rhs->startFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
+
+ /* Perform concatenation. */
+ rtnVal->concatOp( rhs );
+ afterOpMinimize( rtnVal, lastInSeq );
+ break;
+ }
+ case FactorWithAugType: {
+ rtnVal = factorWithAug->walk( pd );
+ break;
+ }
+ }
+ return rtnVal;
+}
+
+void Term::makeNameTree( ParseData *pd )
+{
+ switch ( type ) {
+ case ConcatType:
+ case RightStartType:
+ case RightFinishType:
+ case LeftType:
+ term->makeNameTree( pd );
+ factorWithAug->makeNameTree( pd );
+ break;
+ case FactorWithAugType:
+ factorWithAug->makeNameTree( pd );
+ break;
+ }
+}
+
+void Term::resolveNameRefs( ParseData *pd )
+{
+ switch ( type ) {
+ case ConcatType:
+ case RightStartType:
+ case RightFinishType:
+ case LeftType:
+ term->resolveNameRefs( pd );
+ factorWithAug->resolveNameRefs( pd );
+ break;
+ case FactorWithAugType:
+ factorWithAug->resolveNameRefs( pd );
+ break;
+ }
+}
+
+/* Clean up after a factor with augmentation node. */
+FactorWithAug::~FactorWithAug()
+{
+ delete factorWithRep;
+
+ /* Walk the vector of parser actions, deleting function names. */
+
+ /* Clean up priority descriptors. */
+ if ( priorDescs != 0 )
+ delete[] priorDescs;
+}
+
+void FactorWithAug::assignActions( ParseData *pd, FsmAp *graph, int *actionOrd )
+{
+ /* Assign actions. */
+ for ( int i = 0; i < actions.length(); i++ ) {
+ switch ( actions[i].type ) {
+ /* Transition actions. */
+ case at_start:
+ graph->startFsmAction( actionOrd[i], actions[i].action );
+ afterOpMinimize( graph );
+ break;
+ case at_all:
+ graph->allTransAction( actionOrd[i], actions[i].action );
+ break;
+ case at_finish:
+ graph->finishFsmAction( actionOrd[i], actions[i].action );
+ break;
+ case at_leave:
+ graph->leaveFsmAction( actionOrd[i], actions[i].action );
+ break;
+
+ /* Global error actions. */
+ case at_start_gbl_error:
+ graph->startErrorAction( actionOrd[i], actions[i].action, 0 );
+ afterOpMinimize( graph );
+ break;
+ case at_all_gbl_error:
+ graph->allErrorAction( actionOrd[i], actions[i].action, 0 );
+ break;
+ case at_final_gbl_error:
+ graph->finalErrorAction( actionOrd[i], actions[i].action, 0 );
+ break;
+ case at_not_start_gbl_error:
+ graph->notStartErrorAction( actionOrd[i], actions[i].action, 0 );
+ break;
+ case at_not_final_gbl_error:
+ graph->notFinalErrorAction( actionOrd[i], actions[i].action, 0 );
+ break;
+ case at_middle_gbl_error:
+ graph->middleErrorAction( actionOrd[i], actions[i].action, 0 );
+ break;
+
+ /* Local error actions. */
+ case at_start_local_error:
+ graph->startErrorAction( actionOrd[i], actions[i].action,
+ actions[i].localErrKey );
+ afterOpMinimize( graph );
+ break;
+ case at_all_local_error:
+ graph->allErrorAction( actionOrd[i], actions[i].action,
+ actions[i].localErrKey );
+ break;
+ case at_final_local_error:
+ graph->finalErrorAction( actionOrd[i], actions[i].action,
+ actions[i].localErrKey );
+ break;
+ case at_not_start_local_error:
+ graph->notStartErrorAction( actionOrd[i], actions[i].action,
+ actions[i].localErrKey );
+ break;
+ case at_not_final_local_error:
+ graph->notFinalErrorAction( actionOrd[i], actions[i].action,
+ actions[i].localErrKey );
+ break;
+ case at_middle_local_error:
+ graph->middleErrorAction( actionOrd[i], actions[i].action,
+ actions[i].localErrKey );
+ break;
+
+ /* EOF actions. */
+ case at_start_eof:
+ graph->startEOFAction( actionOrd[i], actions[i].action );
+ afterOpMinimize( graph );
+ break;
+ case at_all_eof:
+ graph->allEOFAction( actionOrd[i], actions[i].action );
+ break;
+ case at_final_eof:
+ graph->finalEOFAction( actionOrd[i], actions[i].action );
+ break;
+ case at_not_start_eof:
+ graph->notStartEOFAction( actionOrd[i], actions[i].action );
+ break;
+ case at_not_final_eof:
+ graph->notFinalEOFAction( actionOrd[i], actions[i].action );
+ break;
+ case at_middle_eof:
+ graph->middleEOFAction( actionOrd[i], actions[i].action );
+ break;
+
+ /* To State Actions. */
+ case at_start_to_state:
+ graph->startToStateAction( actionOrd[i], actions[i].action );
+ afterOpMinimize( graph );
+ break;
+ case at_all_to_state:
+ graph->allToStateAction( actionOrd[i], actions[i].action );
+ break;
+ case at_final_to_state:
+ graph->finalToStateAction( actionOrd[i], actions[i].action );
+ break;
+ case at_not_start_to_state:
+ graph->notStartToStateAction( actionOrd[i], actions[i].action );
+ break;
+ case at_not_final_to_state:
+ graph->notFinalToStateAction( actionOrd[i], actions[i].action );
+ break;
+ case at_middle_to_state:
+ graph->middleToStateAction( actionOrd[i], actions[i].action );
+ break;
+
+ /* From State Actions. */
+ case at_start_from_state:
+ graph->startFromStateAction( actionOrd[i], actions[i].action );
+ afterOpMinimize( graph );
+ break;
+ case at_all_from_state:
+ graph->allFromStateAction( actionOrd[i], actions[i].action );
+ break;
+ case at_final_from_state:
+ graph->finalFromStateAction( actionOrd[i], actions[i].action );
+ break;
+ case at_not_start_from_state:
+ graph->notStartFromStateAction( actionOrd[i], actions[i].action );
+ break;
+ case at_not_final_from_state:
+ graph->notFinalFromStateAction( actionOrd[i], actions[i].action );
+ break;
+ case at_middle_from_state:
+ graph->middleFromStateAction( actionOrd[i], actions[i].action );
+ break;
+
+ /* Remaining cases, prevented by the parser. */
default:
- break;
- }
- }
-}
-
-
-/* Evaluate a factor with augmentation node. */
-FsmAp *FactorWithAug::walk( ParseData *pd )
-{
- /* Enter into the scopes created for the labels. */
- NameFrame nameFrame = pd->enterNameScope( false, labels.length() );
-
- /* Make the array of function orderings. */
- int *actionOrd = 0;
- if ( actions.length() > 0 )
- actionOrd = new int[actions.length()];
-
- /* First walk the list of actions, assigning order to all starting
- * actions. */
- for ( int i = 0; i < actions.length(); i++ ) {
- if ( actions[i].type == at_start ||
- actions[i].type == at_start_gbl_error ||
- actions[i].type == at_start_local_error ||
- actions[i].type == at_start_to_state ||
- actions[i].type == at_start_from_state ||
- actions[i].type == at_start_eof )
- actionOrd[i] = pd->curActionOrd++;
- }
-
- /* Evaluate the factor with repetition. */
- FsmAp *rtnVal = factorWithRep->walk( pd );
-
- /* Compute the remaining action orderings. */
- for ( int i = 0; i < actions.length(); i++ ) {
- if ( actions[i].type != at_start &&
- actions[i].type != at_start_gbl_error &&
- actions[i].type != at_start_local_error &&
- actions[i].type != at_start_to_state &&
- actions[i].type != at_start_from_state &&
- actions[i].type != at_start_eof )
- actionOrd[i] = pd->curActionOrd++;
- }
-
- /* Embed conditions. */
- assignConditions( rtnVal );
-
- /* Embed actions. */
- assignActions( pd, rtnVal , actionOrd );
-
- /* Make the array of priority orderings. Orderings are local to this walk
- * of the factor with augmentation. */
- int *priorOrd = 0;
- if ( priorityAugs.length() > 0 )
- priorOrd = new int[priorityAugs.length()];
-
- /* Walk all priorities, assigning the priority ordering. */
- for ( int i = 0; i < priorityAugs.length(); i++ )
- priorOrd[i] = pd->curPriorOrd++;
-
- /* If the priority descriptors have not been made, make them now. Make
- * priority descriptors for each priority asignment that will be passed to
- * the fsm. Used to keep track of the key, value and used bit. */
- if ( priorDescs == 0 && priorityAugs.length() > 0 ) {
- priorDescs = new PriorDesc[priorityAugs.length()];
- for ( int i = 0; i < priorityAugs.length(); i++ ) {
- /* Init the prior descriptor for the priority setting. */
- priorDescs[i].key = priorityAugs[i].priorKey;
- priorDescs[i].priority = priorityAugs[i].priorValue;
- }
- }
-
- /* Assign priorities into the machine. */
- assignPriorities( rtnVal, priorOrd );
-
- /* Assign epsilon transitions. */
- for ( int e = 0; e < epsilonLinks.length(); e++ ) {
- /* Get the name, which may not exist. If it doesn't then silently
- * ignore it because an error has already been reported. */
- NameInst *epTarg = pd->epsilonResolvedLinks[pd->nextEpsilonResolvedLink++];
- if ( epTarg != 0 ) {
- /* Make the epsilon transitions. */
- rtnVal->epsilonTrans( epTarg->id );
-
- /* Note that we have made a link to the name. */
- pd->localNameScope->referencedNames.append( epTarg );
- }
- }
-
- /* Set entry points for labels. */
- if ( labels.length() > 0 ) {
- /* Pop the names. */
- pd->resetNameScope( nameFrame );
-
- /* Make labels that are referenced into entry points. */
- for ( int i = 0; i < labels.length(); i++ ) {
- pd->enterNameScope( false, 1 );
-
- /* Will always be found. */
- NameInst *name = pd->curNameInst;
-
- /* If the name is referenced then set the entry point. */
- if ( name->numRefs > 0 )
- rtnVal->setEntry( name->id, rtnVal->startState );
- }
-
- pd->popNameScope( nameFrame );
- }
-
- if ( priorOrd != 0 )
- delete[] priorOrd;
- if ( actionOrd != 0 )
- delete[] actionOrd;
- return rtnVal;
-}
-
-void FactorWithAug::makeNameTree( ParseData *pd )
-{
- /* Add the labels to the tree of instantiated names. Each label
- * makes a new scope. */
- NameInst *prevNameInst = pd->curNameInst;
- for ( int i = 0; i < labels.length(); i++ )
- pd->curNameInst = pd->addNameInst( labels[i].loc, labels[i].data, true );
-
- /* Recurse, then pop the names. */
- factorWithRep->makeNameTree( pd );
- pd->curNameInst = prevNameInst;
-}
-
-
-void FactorWithAug::resolveNameRefs( ParseData *pd )
-{
- /* Enter into the name scope created by any labels. */
- NameFrame nameFrame = pd->enterNameScope( false, labels.length() );
-
- /* Note action references. */
- for ( int i = 0; i < actions.length(); i++ )
- actions[i].action->actionRefs.append( pd->localNameScope );
-
- /* Recurse first. IMPORTANT: we must do the exact same traversal as when
- * the tree is constructed. */
- factorWithRep->resolveNameRefs( pd );
-
- /* Resolve epsilon transitions. */
- for ( int ep = 0; ep < epsilonLinks.length(); ep++ ) {
- /* Get the link. */
- EpsilonLink &link = epsilonLinks[ep];
- NameInst *resolvedName = 0;
-
- if ( link.target.length() == 1 && strcmp( link.target.data[0], "final" ) == 0 ) {
- /* Epsilon drawn to an implicit final state. An implicit final is
- * only available in join operations. */
- resolvedName = pd->localNameScope->final;
- }
- else {
- /* Do an search for the name. */
- NameSet resolved;
- pd->resolveFrom( resolved, pd->localNameScope, link.target, 0 );
- if ( resolved.length() > 0 ) {
- /* Take the first one. */
- resolvedName = resolved[0];
- if ( resolved.length() > 1 ) {
- /* Complain about the multiple references. */
- error(link.loc) << "state reference " << link.target <<
- " resolves to multiple entry points" << endl;
- errorStateLabels( resolved );
- }
- }
- }
-
- /* This is tricky, we stuff resolved epsilon transitions into one long
- * vector in the parse data structure. Since the name resolution and
- * graph generation both do identical walks of the parse tree we
- * should always find the link resolutions in the right place. */
- pd->epsilonResolvedLinks.append( resolvedName );
-
- if ( resolvedName != 0 ) {
- /* Found the name, bump of the reference count on it. */
- resolvedName->numRefs += 1;
- }
- else {
- /* Complain, no recovery action, the epsilon op will ignore any
- * epsilon transitions whose names did not resolve. */
- error(link.loc) << "could not resolve label " << link.target << endl;
- }
- }
-
- if ( labels.length() > 0 )
- pd->popNameScope( nameFrame );
-}
-
-
-/* Clean up after a factor with repetition node. */
-FactorWithRep::~FactorWithRep()
-{
- switch ( type ) {
- case StarType: case StarStarType: case OptionalType: case PlusType:
- case ExactType: case MaxType: case MinType: case RangeType:
- delete factorWithRep;
- break;
- case FactorWithNegType:
- delete factorWithNeg;
- break;
- }
-}
-
-/* Evaluate a factor with repetition node. */
-FsmAp *FactorWithRep::walk( ParseData *pd )
-{
- FsmAp *retFsm = 0;
-
- switch ( type ) {
- case StarType: {
- /* Evaluate the FactorWithRep. */
- retFsm = factorWithRep->walk( pd );
- if ( retFsm->startState->isFinState() ) {
- warning(loc) << "applying kleene star to a machine that "
- "accepts zero length word" << endl;
- retFsm->unsetFinState( retFsm->startState );
- }
-
- /* Shift over the start action orders then do the kleene star. */
- pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
- retFsm->starOp( );
- afterOpMinimize( retFsm );
- break;
- }
- case StarStarType: {
- /* Evaluate the FactorWithRep. */
- retFsm = factorWithRep->walk( pd );
- if ( retFsm->startState->isFinState() ) {
- warning(loc) << "applying kleene star to a machine that "
- "accepts zero length word" << endl;
- }
-
- /* Set up the prior descs. All gets priority one, whereas leaving gets
- * priority zero. Make a unique key so that these priorities don't
- * interfere with any priorities set by the user. */
- priorDescs[0].key = pd->nextPriorKey++;
- priorDescs[0].priority = 1;
- retFsm->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
-
- /* Leaveing gets priority 0. Use same unique key. */
- priorDescs[1].key = priorDescs[0].key;
- priorDescs[1].priority = 0;
- retFsm->leaveFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
-
- /* Shift over the start action orders then do the kleene star. */
- pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
- retFsm->starOp( );
- afterOpMinimize( retFsm );
- break;
- }
- case OptionalType: {
- /* Make the null fsm. */
- FsmAp *nu = new FsmAp();
- nu->lambdaFsm( );
-
- /* Evaluate the FactorWithRep. */
- retFsm = factorWithRep->walk( pd );
-
- /* Perform the question operator. */
- retFsm->unionOp( nu );
- afterOpMinimize( retFsm );
- break;
- }
- case PlusType: {
- /* Evaluate the FactorWithRep. */
- retFsm = factorWithRep->walk( pd );
- if ( retFsm->startState->isFinState() ) {
- warning(loc) << "applying plus operator to a machine that "
- "accepts zero length word" << endl;
- }
-
- /* Need a duplicated for the star end. */
- FsmAp *dup = new FsmAp( *retFsm );
-
- /* The start func orders need to be shifted before doing the star. */
- pd->curActionOrd += dup->shiftStartActionOrder( pd->curActionOrd );
-
- /* Star the duplicate. */
- dup->starOp( );
- afterOpMinimize( dup );
-
- retFsm->concatOp( dup );
- afterOpMinimize( retFsm );
- break;
- }
- case ExactType: {
- /* Get an int from the repetition amount. */
- if ( lowerRep == 0 ) {
- /* No copies. Don't need to evaluate the factorWithRep.
- * This Defeats the purpose so give a warning. */
- warning(loc) << "exactly zero repetitions results "
- "in the null machine" << endl;
-
- retFsm = new FsmAp();
- retFsm->lambdaFsm();
- }
- else {
- /* Evaluate the first FactorWithRep. */
- retFsm = factorWithRep->walk( pd );
- if ( retFsm->startState->isFinState() ) {
- warning(loc) << "applying repetition to a machine that "
- "accepts zero length word" << endl;
- }
-
+ assert( false );
+ break;
+ }
+ }
+}
+
+void FactorWithAug::assignPriorities( FsmAp *graph, int *priorOrd )
+{
+ /* Assign priorities. */
+ for ( int i = 0; i < priorityAugs.length(); i++ ) {
+ switch ( priorityAugs[i].type ) {
+ case at_start:
+ graph->startFsmPrior( priorOrd[i], &priorDescs[i]);
+ /* Start fsm priorities are a special case that may require
+ * minimization afterwards. */
+ afterOpMinimize( graph );
+ break;
+ case at_all:
+ graph->allTransPrior( priorOrd[i], &priorDescs[i] );
+ break;
+ case at_finish:
+ graph->finishFsmPrior( priorOrd[i], &priorDescs[i] );
+ break;
+ case at_leave:
+ graph->leaveFsmPrior( priorOrd[i], &priorDescs[i] );
+ break;
+
+ default:
+ /* Parser Prevents this case. */
+ break;
+ }
+ }
+}
+
+void FactorWithAug::assignConditions( FsmAp *graph )
+{
+ for ( int i = 0; i < conditions.length(); i++ ) {
+ switch ( conditions[i].type ) {
+ /* Transition actions. */
+ case at_start:
+ graph->startFsmCondition( conditions[i].action, conditions[i].sense );
+ afterOpMinimize( graph );
+ break;
+ case at_all:
+ graph->allTransCondition( conditions[i].action, conditions[i].sense );
+ break;
+ case at_leave:
+ graph->leaveFsmCondition( conditions[i].action, conditions[i].sense );
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+
+/* Evaluate a factor with augmentation node. */
+FsmAp *FactorWithAug::walk( ParseData *pd )
+{
+ /* Enter into the scopes created for the labels. */
+ NameFrame nameFrame = pd->enterNameScope( false, labels.length() );
+
+ /* Make the array of function orderings. */
+ int *actionOrd = 0;
+ if ( actions.length() > 0 )
+ actionOrd = new int[actions.length()];
+
+ /* First walk the list of actions, assigning order to all starting
+ * actions. */
+ for ( int i = 0; i < actions.length(); i++ ) {
+ if ( actions[i].type == at_start ||
+ actions[i].type == at_start_gbl_error ||
+ actions[i].type == at_start_local_error ||
+ actions[i].type == at_start_to_state ||
+ actions[i].type == at_start_from_state ||
+ actions[i].type == at_start_eof )
+ actionOrd[i] = pd->curActionOrd++;
+ }
+
+ /* Evaluate the factor with repetition. */
+ FsmAp *rtnVal = factorWithRep->walk( pd );
+
+ /* Compute the remaining action orderings. */
+ for ( int i = 0; i < actions.length(); i++ ) {
+ if ( actions[i].type != at_start &&
+ actions[i].type != at_start_gbl_error &&
+ actions[i].type != at_start_local_error &&
+ actions[i].type != at_start_to_state &&
+ actions[i].type != at_start_from_state &&
+ actions[i].type != at_start_eof )
+ actionOrd[i] = pd->curActionOrd++;
+ }
+
+ /* Embed conditions. */
+ assignConditions( rtnVal );
+
+ /* Embed actions. */
+ assignActions( pd, rtnVal , actionOrd );
+
+ /* Make the array of priority orderings. Orderings are local to this walk
+ * of the factor with augmentation. */
+ int *priorOrd = 0;
+ if ( priorityAugs.length() > 0 )
+ priorOrd = new int[priorityAugs.length()];
+
+ /* Walk all priorities, assigning the priority ordering. */
+ for ( int i = 0; i < priorityAugs.length(); i++ )
+ priorOrd[i] = pd->curPriorOrd++;
+
+ /* If the priority descriptors have not been made, make them now. Make
+ * priority descriptors for each priority asignment that will be passed to
+ * the fsm. Used to keep track of the key, value and used bit. */
+ if ( priorDescs == 0 && priorityAugs.length() > 0 ) {
+ priorDescs = new PriorDesc[priorityAugs.length()];
+ for ( int i = 0; i < priorityAugs.length(); i++ ) {
+ /* Init the prior descriptor for the priority setting. */
+ priorDescs[i].key = priorityAugs[i].priorKey;
+ priorDescs[i].priority = priorityAugs[i].priorValue;
+ }
+ }
+
+ /* Assign priorities into the machine. */
+ assignPriorities( rtnVal, priorOrd );
+
+ /* Assign epsilon transitions. */
+ for ( int e = 0; e < epsilonLinks.length(); e++ ) {
+ /* Get the name, which may not exist. If it doesn't then silently
+ * ignore it because an error has already been reported. */
+ NameInst *epTarg = pd->epsilonResolvedLinks[pd->nextEpsilonResolvedLink++];
+ if ( epTarg != 0 ) {
+ /* Make the epsilon transitions. */
+ rtnVal->epsilonTrans( epTarg->id );
+
+ /* Note that we have made a link to the name. */
+ pd->localNameScope->referencedNames.append( epTarg );
+ }
+ }
+
+ /* Set entry points for labels. */
+ if ( labels.length() > 0 ) {
+ /* Pop the names. */
+ pd->resetNameScope( nameFrame );
+
+ /* Make labels that are referenced into entry points. */
+ for ( int i = 0; i < labels.length(); i++ ) {
+ pd->enterNameScope( false, 1 );
+
+ /* Will always be found. */
+ NameInst *name = pd->curNameInst;
+
+ /* If the name is referenced then set the entry point. */
+ if ( name->numRefs > 0 )
+ rtnVal->setEntry( name->id, rtnVal->startState );
+ }
+
+ pd->popNameScope( nameFrame );
+ }
+
+ if ( priorOrd != 0 )
+ delete[] priorOrd;
+ if ( actionOrd != 0 )
+ delete[] actionOrd;
+ return rtnVal;
+}
+
+void FactorWithAug::makeNameTree( ParseData *pd )
+{
+ /* Add the labels to the tree of instantiated names. Each label
+ * makes a new scope. */
+ NameInst *prevNameInst = pd->curNameInst;
+ for ( int i = 0; i < labels.length(); i++ )
+ pd->curNameInst = pd->addNameInst( labels[i].loc, labels[i].data, true );
+
+ /* Recurse, then pop the names. */
+ factorWithRep->makeNameTree( pd );
+ pd->curNameInst = prevNameInst;
+}
+
+
+void FactorWithAug::resolveNameRefs( ParseData *pd )
+{
+ /* Enter into the name scope created by any labels. */
+ NameFrame nameFrame = pd->enterNameScope( false, labels.length() );
+
+ /* Note action references. */
+ for ( int i = 0; i < actions.length(); i++ )
+ actions[i].action->actionRefs.append( pd->localNameScope );
+
+ /* Recurse first. IMPORTANT: we must do the exact same traversal as when
+ * the tree is constructed. */
+ factorWithRep->resolveNameRefs( pd );
+
+ /* Resolve epsilon transitions. */
+ for ( int ep = 0; ep < epsilonLinks.length(); ep++ ) {
+ /* Get the link. */
+ EpsilonLink &link = epsilonLinks[ep];
+ NameInst *resolvedName = 0;
+
+ if ( link.target.length() == 1 && strcmp( link.target.data[0], "final" ) == 0 ) {
+ /* Epsilon drawn to an implicit final state. An implicit final is
+ * only available in join operations. */
+ resolvedName = pd->localNameScope->final;
+ }
+ else {
+ /* Do an search for the name. */
+ NameSet resolved;
+ pd->resolveFrom( resolved, pd->localNameScope, link.target, 0 );
+ if ( resolved.length() > 0 ) {
+ /* Take the first one. */
+ resolvedName = resolved[0];
+ if ( resolved.length() > 1 ) {
+ /* Complain about the multiple references. */
+ error(link.loc) << "state reference " << link.target <<
+ " resolves to multiple entry points" << endl;
+ errorStateLabels( resolved );
+ }
+ }
+ }
+
+ /* This is tricky, we stuff resolved epsilon transitions into one long
+ * vector in the parse data structure. Since the name resolution and
+ * graph generation both do identical walks of the parse tree we
+ * should always find the link resolutions in the right place. */
+ pd->epsilonResolvedLinks.append( resolvedName );
+
+ if ( resolvedName != 0 ) {
+ /* Found the name, bump of the reference count on it. */
+ resolvedName->numRefs += 1;
+ }
+ else {
+ /* Complain, no recovery action, the epsilon op will ignore any
+ * epsilon transitions whose names did not resolve. */
+ error(link.loc) << "could not resolve label " << link.target << endl;
+ }
+ }
+
+ if ( labels.length() > 0 )
+ pd->popNameScope( nameFrame );
+}
+
+
+/* Clean up after a factor with repetition node. */
+FactorWithRep::~FactorWithRep()
+{
+ switch ( type ) {
+ case StarType: case StarStarType: case OptionalType: case PlusType:
+ case ExactType: case MaxType: case MinType: case RangeType:
+ delete factorWithRep;
+ break;
+ case FactorWithNegType:
+ delete factorWithNeg;
+ break;
+ }
+}
+
+/* Evaluate a factor with repetition node. */
+FsmAp *FactorWithRep::walk( ParseData *pd )
+{
+ FsmAp *retFsm = 0;
+
+ switch ( type ) {
+ case StarType: {
+ /* Evaluate the FactorWithRep. */
+ retFsm = factorWithRep->walk( pd );
+ if ( retFsm->startState->isFinState() ) {
+ warning(loc) << "applying kleene star to a machine that "
+ "accepts zero length word" << endl;
+ retFsm->unsetFinState( retFsm->startState );
+ }
+
+ /* Shift over the start action orders then do the kleene star. */
+ pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
+ retFsm->starOp( );
+ afterOpMinimize( retFsm );
+ break;
+ }
+ case StarStarType: {
+ /* Evaluate the FactorWithRep. */
+ retFsm = factorWithRep->walk( pd );
+ if ( retFsm->startState->isFinState() ) {
+ warning(loc) << "applying kleene star to a machine that "
+ "accepts zero length word" << endl;
+ }
+
+ /* Set up the prior descs. All gets priority one, whereas leaving gets
+ * priority zero. Make a unique key so that these priorities don't
+ * interfere with any priorities set by the user. */
+ priorDescs[0].key = pd->nextPriorKey++;
+ priorDescs[0].priority = 1;
+ retFsm->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
+
+ /* Leaveing gets priority 0. Use same unique key. */
+ priorDescs[1].key = priorDescs[0].key;
+ priorDescs[1].priority = 0;
+ retFsm->leaveFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
+
+ /* Shift over the start action orders then do the kleene star. */
+ pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
+ retFsm->starOp( );
+ afterOpMinimize( retFsm );
+ break;
+ }
+ case OptionalType: {
+ /* Make the null fsm. */
+ FsmAp *nu = new FsmAp();
+ nu->lambdaFsm( );
+
+ /* Evaluate the FactorWithRep. */
+ retFsm = factorWithRep->walk( pd );
+
+ /* Perform the question operator. */
+ retFsm->unionOp( nu );
+ afterOpMinimize( retFsm );
+ break;
+ }
+ case PlusType: {
+ /* Evaluate the FactorWithRep. */
+ retFsm = factorWithRep->walk( pd );
+ if ( retFsm->startState->isFinState() ) {
+ warning(loc) << "applying plus operator to a machine that "
+ "accepts zero length word" << endl;
+ }
+
+ /* Need a duplicated for the star end. */
+ FsmAp *dup = new FsmAp( *retFsm );
+
+ /* The start func orders need to be shifted before doing the star. */
+ pd->curActionOrd += dup->shiftStartActionOrder( pd->curActionOrd );
+
+ /* Star the duplicate. */
+ dup->starOp( );
+ afterOpMinimize( dup );
+
+ retFsm->concatOp( dup );
+ afterOpMinimize( retFsm );
+ break;
+ }
+ case ExactType: {
+ /* Get an int from the repetition amount. */
+ if ( lowerRep == 0 ) {
+ /* No copies. Don't need to evaluate the factorWithRep.
+ * This Defeats the purpose so give a warning. */
+ warning(loc) << "exactly zero repetitions results "
+ "in the null machine" << endl;
+
+ retFsm = new FsmAp();
+ retFsm->lambdaFsm();
+ }
+ else {
+ /* Evaluate the first FactorWithRep. */
+ retFsm = factorWithRep->walk( pd );
+ if ( retFsm->startState->isFinState() ) {
+ warning(loc) << "applying repetition to a machine that "
+ "accepts zero length word" << endl;
+ }
+
+ /* The start func orders need to be shifted before doing the
+ * repetition. */
+ pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
+
+ /* Do the repetition on the machine. Already guarded against n == 0 */
+ retFsm->repeatOp( lowerRep );
+ afterOpMinimize( retFsm );
+ }
+ break;
+ }
+ case MaxType: {
+ /* Get an int from the repetition amount. */
+ if ( upperRep == 0 ) {
+ /* No copies. Don't need to evaluate the factorWithRep.
+ * This Defeats the purpose so give a warning. */
+ warning(loc) << "max zero repetitions results "
+ "in the null machine" << endl;
+
+ retFsm = new FsmAp();
+ retFsm->lambdaFsm();
+ }
+ else {
+ /* Evaluate the first FactorWithRep. */
+ retFsm = factorWithRep->walk( pd );
+ if ( retFsm->startState->isFinState() ) {
+ warning(loc) << "applying max repetition to a machine that "
+ "accepts zero length word" << endl;
+ }
+
/* The start func orders need to be shifted before doing the
- * repetition. */
- pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
-
- /* Do the repetition on the machine. Already guarded against n == 0 */
- retFsm->repeatOp( lowerRep );
- afterOpMinimize( retFsm );
- }
- break;
- }
- case MaxType: {
- /* Get an int from the repetition amount. */
- if ( upperRep == 0 ) {
- /* No copies. Don't need to evaluate the factorWithRep.
- * This Defeats the purpose so give a warning. */
- warning(loc) << "max zero repetitions results "
- "in the null machine" << endl;
-
- retFsm = new FsmAp();
- retFsm->lambdaFsm();
- }
- else {
- /* Evaluate the first FactorWithRep. */
- retFsm = factorWithRep->walk( pd );
- if ( retFsm->startState->isFinState() ) {
- warning(loc) << "applying max repetition to a machine that "
- "accepts zero length word" << endl;
- }
-
- /* The start func orders need to be shifted before doing the
- * repetition. */
- pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
-
- /* Do the repetition on the machine. Already guarded against n == 0 */
- retFsm->optionalRepeatOp( upperRep );
- afterOpMinimize( retFsm );
- }
- break;
- }
- case MinType: {
- /* Evaluate the repeated machine. */
- retFsm = factorWithRep->walk( pd );
- if ( retFsm->startState->isFinState() ) {
- warning(loc) << "applying min repetition to a machine that "
- "accepts zero length word" << endl;
- }
-
- /* The start func orders need to be shifted before doing the repetition
- * and the kleene star. */
- pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
-
- if ( lowerRep == 0 ) {
- /* Acts just like a star op on the machine to return. */
- retFsm->starOp( );
- afterOpMinimize( retFsm );
- }
- else {
- /* Take a duplicate for the plus. */
- FsmAp *dup = new FsmAp( *retFsm );
-
- /* Do repetition on the first half. */
- retFsm->repeatOp( lowerRep );
- afterOpMinimize( retFsm );
-
- /* Star the duplicate. */
- dup->starOp( );
- afterOpMinimize( dup );
-
- /* Tak on the kleene star. */
- retFsm->concatOp( dup );
- afterOpMinimize( retFsm );
- }
- break;
- }
- case RangeType: {
- /* Check for bogus range. */
- if ( upperRep - lowerRep < 0 ) {
- error(loc) << "invalid range repetition" << endl;
-
- /* Return null machine as recovery. */
- retFsm = new FsmAp();
- retFsm->lambdaFsm();
- }
- else if ( lowerRep == 0 && upperRep == 0 ) {
- /* No copies. Don't need to evaluate the factorWithRep. This
- * defeats the purpose so give a warning. */
- warning(loc) << "zero to zero repetitions results "
- "in the null machine" << endl;
-
- retFsm = new FsmAp();
- retFsm->lambdaFsm();
- }
- else {
- /* Now need to evaluate the repeated machine. */
- retFsm = factorWithRep->walk( pd );
- if ( retFsm->startState->isFinState() ) {
- warning(loc) << "applying range repetition to a machine that "
- "accepts zero length word" << endl;
- }
-
- /* The start func orders need to be shifted before doing both kinds
- * of repetition. */
- pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
-
- if ( lowerRep == 0 ) {
- /* Just doing max repetition. Already guarded against n == 0. */
- retFsm->optionalRepeatOp( upperRep );
- afterOpMinimize( retFsm );
- }
- else if ( lowerRep == upperRep ) {
- /* Just doing exact repetition. Already guarded against n == 0. */
- retFsm->repeatOp( lowerRep );
- afterOpMinimize( retFsm );
- }
- else {
- /* This is the case that 0 < lowerRep < upperRep. Take a
- * duplicate for the optional repeat. */
- FsmAp *dup = new FsmAp( *retFsm );
-
- /* Do repetition on the first half. */
- retFsm->repeatOp( lowerRep );
- afterOpMinimize( retFsm );
-
- /* Do optional repetition on the second half. */
- dup->optionalRepeatOp( upperRep - lowerRep );
- afterOpMinimize( dup );
-
- /* Tak on the duplicate machine. */
- retFsm->concatOp( dup );
- afterOpMinimize( retFsm );
- }
- }
- break;
- }
- case FactorWithNegType: {
- /* Evaluate the Factor. Pass it up. */
- retFsm = factorWithNeg->walk( pd );
- break;
- }}
- return retFsm;
-}
-
-void FactorWithRep::makeNameTree( ParseData *pd )
-{
- switch ( type ) {
- case StarType:
- case StarStarType:
- case OptionalType:
- case PlusType:
- case ExactType:
- case MaxType:
- case MinType:
- case RangeType:
- factorWithRep->makeNameTree( pd );
- break;
- case FactorWithNegType:
- factorWithNeg->makeNameTree( pd );
- break;
- }
-}
-
-void FactorWithRep::resolveNameRefs( ParseData *pd )
-{
- switch ( type ) {
- case StarType:
- case StarStarType:
- case OptionalType:
- case PlusType:
- case ExactType:
- case MaxType:
- case MinType:
- case RangeType:
- factorWithRep->resolveNameRefs( pd );
- break;
- case FactorWithNegType:
- factorWithNeg->resolveNameRefs( pd );
- break;
- }
-}
-
-/* Clean up after a factor with negation node. */
-FactorWithNeg::~FactorWithNeg()
-{
- switch ( type ) {
- case NegateType:
- case CharNegateType:
- delete factorWithNeg;
- break;
- case FactorType:
- delete factor;
- break;
- }
-}
-
-/* Evaluate a factor with negation node. */
-FsmAp *FactorWithNeg::walk( ParseData *pd )
-{
- FsmAp *retFsm = 0;
-
- switch ( type ) {
- case NegateType: {
- /* Evaluate the factorWithNeg. */
- FsmAp *toNegate = factorWithNeg->walk( pd );
-
- /* Negation is subtract from dot-star. */
- retFsm = dotStarFsm( pd );
- retFsm->subtractOp( toNegate );
- afterOpMinimize( retFsm );
- break;
- }
- case CharNegateType: {
- /* Evaluate the factorWithNeg. */
- FsmAp *toNegate = factorWithNeg->walk( pd );
-
- /* CharNegation is subtract from dot. */
- retFsm = dotFsm( pd );
- retFsm->subtractOp( toNegate );
- afterOpMinimize( retFsm );
- break;
- }
- case FactorType: {
- /* Evaluate the Factor. Pass it up. */
- retFsm = factor->walk( pd );
- break;
- }}
- return retFsm;
-}
-
-void FactorWithNeg::makeNameTree( ParseData *pd )
-{
- switch ( type ) {
- case NegateType:
- case CharNegateType:
- factorWithNeg->makeNameTree( pd );
- break;
- case FactorType:
- factor->makeNameTree( pd );
- break;
- }
-}
-
-void FactorWithNeg::resolveNameRefs( ParseData *pd )
-{
- switch ( type ) {
- case NegateType:
- case CharNegateType:
- factorWithNeg->resolveNameRefs( pd );
- break;
- case FactorType:
- factor->resolveNameRefs( pd );
- break;
- }
-}
-
-/* Clean up after a factor node. */
-Factor::~Factor()
-{
- switch ( type ) {
- case LiteralType:
- delete literal;
- break;
- case RangeType:
- delete range;
- break;
- case OrExprType:
- delete reItem;
- break;
- case RegExprType:
- delete regExpr;
- break;
- case ReferenceType:
- break;
- case ParenType:
- delete join;
- break;
- case LongestMatchType:
- delete longestMatch;
- break;
- }
-}
-
-/* Evaluate a factor node. */
-FsmAp *Factor::walk( ParseData *pd )
-{
- FsmAp *rtnVal = 0;
- switch ( type ) {
- case LiteralType:
- rtnVal = literal->walk( pd );
- break;
- case RangeType:
- rtnVal = range->walk( pd );
- break;
- case OrExprType:
- rtnVal = reItem->walk( pd, 0 );
- break;
- case RegExprType:
- rtnVal = regExpr->walk( pd, 0 );
- break;
- case ReferenceType:
- rtnVal = varDef->walk( pd );
- break;
- case ParenType:
- rtnVal = join->walk( pd );
- break;
- case LongestMatchType:
- rtnVal = longestMatch->walk( pd );
- break;
- }
-
- return rtnVal;
-}
-
-void Factor::makeNameTree( ParseData *pd )
-{
- switch ( type ) {
- case LiteralType:
- case RangeType:
- case OrExprType:
- case RegExprType:
- break;
- case ReferenceType:
- varDef->makeNameTree( loc, pd );
- break;
- case ParenType:
- join->makeNameTree( pd );
- break;
- case LongestMatchType:
- longestMatch->makeNameTree( pd );
- break;
- }
-}
-
-void Factor::resolveNameRefs( ParseData *pd )
-{
- switch ( type ) {
- case LiteralType:
- case RangeType:
- case OrExprType:
- case RegExprType:
- break;
- case ReferenceType:
- varDef->resolveNameRefs( pd );
- break;
- case ParenType:
- join->resolveNameRefs( pd );
- break;
- case LongestMatchType:
- longestMatch->resolveNameRefs( pd );
- break;
- }
-}
-
-/* Clean up a range object. Must delete the two literals. */
-Range::~Range()
-{
- delete lowerLit;
- delete upperLit;
-}
-
-/* Evaluate a range. Gets the lower an upper key and makes an fsm range. */
-FsmAp *Range::walk( ParseData *pd )
-{
- /* Construct and verify the suitability of the lower end of the range. */
- FsmAp *lowerFsm = lowerLit->walk( pd );
- if ( !lowerFsm->checkSingleCharMachine() ) {
- error(lowerLit->token.loc) <<
- "bad range lower end, must be a single character" << endl;
- }
-
- /* Construct and verify the upper end. */
- FsmAp *upperFsm = upperLit->walk( pd );
- if ( !upperFsm->checkSingleCharMachine() ) {
- error(upperLit->token.loc) <<
- "bad range upper end, must be a single character" << endl;
- }
-
- /* Grab the keys from the machines, then delete them. */
- Key lowKey = lowerFsm->startState->outList.head->lowKey;
- Key highKey = upperFsm->startState->outList.head->lowKey;
- delete lowerFsm;
- delete upperFsm;
-
- /* Validate the range. */
- if ( lowKey > highKey ) {
- /* Recover by setting upper to lower; */
- error(lowerLit->token.loc) << "lower end of range is greater then upper end" << endl;
- highKey = lowKey;
- }
-
- /* Return the range now that it is validated. */
- FsmAp *retFsm = new FsmAp();
- retFsm->rangeFsm( lowKey, highKey );
- return retFsm;
-}
-
-/* Evaluate a literal object. */
-FsmAp *Literal::walk( ParseData *pd )
-{
- /* FsmAp to return, is the alphabet signed. */
- FsmAp *rtnVal = 0;
-
- switch ( type ) {
- case Number: {
- /* Make the fsm key in int format. */
- Key fsmKey = makeFsmKeyNum( token.data, token.loc, pd );
- /* Make the new machine. */
- rtnVal = new FsmAp();
- rtnVal->concatFsm( fsmKey );
- break;
- }
- case LitString: {
- /* Make the array of keys in int format. */
- long length;
- bool caseInsensitive;
- char *data = prepareLitString( token.loc, token.data, token.length,
- length, caseInsensitive );
- Key *arr = new Key[length];
- makeFsmKeyArray( arr, data, length, pd );
-
- /* Make the new machine. */
- rtnVal = new FsmAp();
- if ( caseInsensitive )
- rtnVal->concatFsmCI( arr, length );
- else
- rtnVal->concatFsm( arr, length );
- delete[] data;
- delete[] arr;
- break;
- }}
- return rtnVal;
-}
-
-/* Clean up after a regular expression object. */
-RegExpr::~RegExpr()
-{
- switch ( type ) {
- case RecurseItem:
- delete regExpr;
- delete item;
- break;
- case Empty:
- break;
- }
-}
-
-/* Evaluate a regular expression object. */
-FsmAp *RegExpr::walk( ParseData *pd, RegExpr *rootRegex )
-{
- /* This is the root regex, pass down a pointer to this. */
- if ( rootRegex == 0 )
- rootRegex = this;
-
- FsmAp *rtnVal = 0;
- switch ( type ) {
- case RecurseItem: {
- /* Walk both items. */
- rtnVal = regExpr->walk( pd, rootRegex );
- FsmAp *fsm2 = item->walk( pd, rootRegex );
- rtnVal->concatOp( fsm2 );
- break;
- }
- case Empty: {
- rtnVal = new FsmAp();
- rtnVal->lambdaFsm();
- break;
- }
- }
- return rtnVal;
-}
-
-/* Clean up after an item in a regular expression. */
-ReItem::~ReItem()
-{
- switch ( type ) {
- case Data:
- case Dot:
- break;
- case OrBlock:
- case NegOrBlock:
- delete orBlock;
- break;
- }
-}
-
-/* Evaluate a regular expression object. */
-FsmAp *ReItem::walk( ParseData *pd, RegExpr *rootRegex )
-{
- /* The fsm to return, is the alphabet signed? */
- FsmAp *rtnVal = 0;
-
- switch ( type ) {
- case Data: {
- /* Move the data into an integer array and make a concat fsm. */
- Key *arr = new Key[token.length];
- makeFsmKeyArray( arr, token.data, token.length, pd );
-
- /* Make the concat fsm. */
- rtnVal = new FsmAp();
- if ( rootRegex != 0 && rootRegex->caseInsensitive )
- rtnVal->concatFsmCI( arr, token.length );
- else
- rtnVal->concatFsm( arr, token.length );
- delete[] arr;
- break;
- }
- case Dot: {
- /* Make the dot fsm. */
- rtnVal = dotFsm( pd );
- break;
- }
- case OrBlock: {
- /* Get the or block and minmize it. */
- rtnVal = orBlock->walk( pd, rootRegex );
- if ( rtnVal == 0 ) {
- rtnVal = new FsmAp();
- rtnVal->lambdaFsm();
- }
- rtnVal->minimizePartition2();
- break;
- }
- case NegOrBlock: {
- /* Get the or block and minimize it. */
- FsmAp *fsm = orBlock->walk( pd, rootRegex );
- fsm->minimizePartition2();
-
- /* Make a dot fsm and subtract from it. */
- rtnVal = dotFsm( pd );
- rtnVal->subtractOp( fsm );
- rtnVal->minimizePartition2();
- break;
- }
- }
-
- /* If the item is followed by a star, then apply the star op. */
- if ( star ) {
- if ( rtnVal->startState->isFinState() ) {
- warning(loc) << "applying kleene star to a machine that "
- "accepts zero length word" << endl;
- }
-
- rtnVal->starOp();
- rtnVal->minimizePartition2();
- }
- return rtnVal;
-}
-
-/* Clean up after an or block of a regular expression. */
-ReOrBlock::~ReOrBlock()
-{
- switch ( type ) {
- case RecurseItem:
- delete orBlock;
- delete item;
- break;
- case Empty:
- break;
- }
-}
-
-
-/* Evaluate an or block of a regular expression. */
-FsmAp *ReOrBlock::walk( ParseData *pd, RegExpr *rootRegex )
-{
- FsmAp *rtnVal = 0;
- switch ( type ) {
- case RecurseItem: {
- /* Evaluate the two fsm. */
- FsmAp *fsm1 = orBlock->walk( pd, rootRegex );
- FsmAp *fsm2 = item->walk( pd, rootRegex );
- if ( fsm1 == 0 )
- rtnVal = fsm2;
- else {
- fsm1->unionOp( fsm2 );
- rtnVal = fsm1;
- }
- break;
- }
- case Empty: {
- rtnVal = 0;
- break;
- }
- }
- return rtnVal;;
-}
-
-/* Evaluate an or block item of a regular expression. */
-FsmAp *ReOrItem::walk( ParseData *pd, RegExpr *rootRegex )
-{
- /* The return value, is the alphabet signed? */
- FsmAp *rtnVal = 0;
- switch ( type ) {
- case Data: {
- /* Make the or machine. */
- rtnVal = new FsmAp();
-
- /* Put the or data into an array of ints. Note that we find unique
- * keys. Duplicates are silently ignored. The alternative would be to
- * issue warning or an error but since we can't with [a0-9a] or 'a' |
- * 'a' don't bother here. */
- KeySet keySet;
- makeFsmUniqueKeyArray( keySet, token.data, token.length,
- rootRegex != 0 ? rootRegex->caseInsensitive : false, pd );
-
- /* Run the or operator. */
- rtnVal->orFsm( keySet.data, keySet.length() );
- break;
- }
- case Range: {
- /* Make the upper and lower keys. */
- Key lowKey = makeFsmKeyChar( lower, pd );
- Key highKey = makeFsmKeyChar( upper, pd );
-
- /* Validate the range. */
- if ( lowKey > highKey ) {
- /* Recover by setting upper to lower; */
- error(loc) << "lower end of range is greater then upper end" << endl;
- highKey = lowKey;
- }
-
- /* Make the range machine. */
- rtnVal = new FsmAp();
- rtnVal->rangeFsm( lowKey, highKey );
-
- if ( rootRegex != 0 && rootRegex->caseInsensitive ) {
- if ( lowKey <= 'Z' && 'A' <= highKey ) {
- Key otherLow = lowKey < 'A' ? Key('A') : lowKey;
- Key otherHigh = 'Z' < highKey ? Key('Z') : highKey;
-
- otherLow = 'a' + ( otherLow - 'A' );
- otherHigh = 'a' + ( otherHigh - 'A' );
-
- FsmAp *otherRange = new FsmAp();
- otherRange->rangeFsm( otherLow, otherHigh );
- rtnVal->unionOp( otherRange );
- rtnVal->minimizePartition2();
- }
- else if ( lowKey <= 'z' && 'a' <= highKey ) {
- Key otherLow = lowKey < 'a' ? Key('a') : lowKey;
- Key otherHigh = 'z' < highKey ? Key('z') : highKey;
-
- otherLow = 'A' + ( otherLow - 'a' );
- otherHigh = 'A' + ( otherHigh - 'a' );
-
- FsmAp *otherRange = new FsmAp();
- otherRange->rangeFsm( otherLow, otherHigh );
- rtnVal->unionOp( otherRange );
- rtnVal->minimizePartition2();
- }
- }
-
- break;
- }}
- return rtnVal;
-}
+ * repetition. */
+ pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
+
+ /* Do the repetition on the machine. Already guarded against n == 0 */
+ retFsm->optionalRepeatOp( upperRep );
+ afterOpMinimize( retFsm );
+ }
+ break;
+ }
+ case MinType: {
+ /* Evaluate the repeated machine. */
+ retFsm = factorWithRep->walk( pd );
+ if ( retFsm->startState->isFinState() ) {
+ warning(loc) << "applying min repetition to a machine that "
+ "accepts zero length word" << endl;
+ }
+
+ /* The start func orders need to be shifted before doing the repetition
+ * and the kleene star. */
+ pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
+
+ if ( lowerRep == 0 ) {
+ /* Acts just like a star op on the machine to return. */
+ retFsm->starOp( );
+ afterOpMinimize( retFsm );
+ }
+ else {
+ /* Take a duplicate for the plus. */
+ FsmAp *dup = new FsmAp( *retFsm );
+
+ /* Do repetition on the first half. */
+ retFsm->repeatOp( lowerRep );
+ afterOpMinimize( retFsm );
+
+ /* Star the duplicate. */
+ dup->starOp( );
+ afterOpMinimize( dup );
+
+ /* Tak on the kleene star. */
+ retFsm->concatOp( dup );
+ afterOpMinimize( retFsm );
+ }
+ break;
+ }
+ case RangeType: {
+ /* Check for bogus range. */
+ if ( upperRep - lowerRep < 0 ) {
+ error(loc) << "invalid range repetition" << endl;
+
+ /* Return null machine as recovery. */
+ retFsm = new FsmAp();
+ retFsm->lambdaFsm();
+ }
+ else if ( lowerRep == 0 && upperRep == 0 ) {
+ /* No copies. Don't need to evaluate the factorWithRep. This
+ * defeats the purpose so give a warning. */
+ warning(loc) << "zero to zero repetitions results "
+ "in the null machine" << endl;
+
+ retFsm = new FsmAp();
+ retFsm->lambdaFsm();
+ }
+ else {
+ /* Now need to evaluate the repeated machine. */
+ retFsm = factorWithRep->walk( pd );
+ if ( retFsm->startState->isFinState() ) {
+ warning(loc) << "applying range repetition to a machine that "
+ "accepts zero length word" << endl;
+ }
+
+ /* The start func orders need to be shifted before doing both kinds
+ * of repetition. */
+ pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
+
+ if ( lowerRep == 0 ) {
+ /* Just doing max repetition. Already guarded against n == 0. */
+ retFsm->optionalRepeatOp( upperRep );
+ afterOpMinimize( retFsm );
+ }
+ else if ( lowerRep == upperRep ) {
+ /* Just doing exact repetition. Already guarded against n == 0. */
+ retFsm->repeatOp( lowerRep );
+ afterOpMinimize( retFsm );
+ }
+ else {
+ /* This is the case that 0 < lowerRep < upperRep. Take a
+ * duplicate for the optional repeat. */
+ FsmAp *dup = new FsmAp( *retFsm );
+
+ /* Do repetition on the first half. */
+ retFsm->repeatOp( lowerRep );
+ afterOpMinimize( retFsm );
+
+ /* Do optional repetition on the second half. */
+ dup->optionalRepeatOp( upperRep - lowerRep );
+ afterOpMinimize( dup );
+
+ /* Tak on the duplicate machine. */
+ retFsm->concatOp( dup );
+ afterOpMinimize( retFsm );
+ }
+ }
+ break;
+ }
+ case FactorWithNegType: {
+ /* Evaluate the Factor. Pass it up. */
+ retFsm = factorWithNeg->walk( pd );
+ break;
+ }}
+ return retFsm;
+}
+
+void FactorWithRep::makeNameTree( ParseData *pd )
+{
+ switch ( type ) {
+ case StarType:
+ case StarStarType:
+ case OptionalType:
+ case PlusType:
+ case ExactType:
+ case MaxType:
+ case MinType:
+ case RangeType:
+ factorWithRep->makeNameTree( pd );
+ break;
+ case FactorWithNegType:
+ factorWithNeg->makeNameTree( pd );
+ break;
+ }
+}
+
+void FactorWithRep::resolveNameRefs( ParseData *pd )
+{
+ switch ( type ) {
+ case StarType:
+ case StarStarType:
+ case OptionalType:
+ case PlusType:
+ case ExactType:
+ case MaxType:
+ case MinType:
+ case RangeType:
+ factorWithRep->resolveNameRefs( pd );
+ break;
+ case FactorWithNegType:
+ factorWithNeg->resolveNameRefs( pd );
+ break;
+ }
+}
+
+/* Clean up after a factor with negation node. */
+FactorWithNeg::~FactorWithNeg()
+{
+ switch ( type ) {
+ case NegateType:
+ case CharNegateType:
+ delete factorWithNeg;
+ break;
+ case FactorType:
+ delete factor;
+ break;
+ }
+}
+
+/* Evaluate a factor with negation node. */
+FsmAp *FactorWithNeg::walk( ParseData *pd )
+{
+ FsmAp *retFsm = 0;
+
+ switch ( type ) {
+ case NegateType: {
+ /* Evaluate the factorWithNeg. */
+ FsmAp *toNegate = factorWithNeg->walk( pd );
+
+ /* Negation is subtract from dot-star. */
+ retFsm = dotStarFsm( pd );
+ retFsm->subtractOp( toNegate );
+ afterOpMinimize( retFsm );
+ break;
+ }
+ case CharNegateType: {
+ /* Evaluate the factorWithNeg. */
+ FsmAp *toNegate = factorWithNeg->walk( pd );
+
+ /* CharNegation is subtract from dot. */
+ retFsm = dotFsm( pd );
+ retFsm->subtractOp( toNegate );
+ afterOpMinimize( retFsm );
+ break;
+ }
+ case FactorType: {
+ /* Evaluate the Factor. Pass it up. */
+ retFsm = factor->walk( pd );
+ break;
+ }}
+ return retFsm;
+}
+
+void FactorWithNeg::makeNameTree( ParseData *pd )
+{
+ switch ( type ) {
+ case NegateType:
+ case CharNegateType:
+ factorWithNeg->makeNameTree( pd );
+ break;
+ case FactorType:
+ factor->makeNameTree( pd );
+ break;
+ }
+}
+
+void FactorWithNeg::resolveNameRefs( ParseData *pd )
+{
+ switch ( type ) {
+ case NegateType:
+ case CharNegateType:
+ factorWithNeg->resolveNameRefs( pd );
+ break;
+ case FactorType:
+ factor->resolveNameRefs( pd );
+ break;
+ }
+}
+
+/* Clean up after a factor node. */
+Factor::~Factor()
+{
+ switch ( type ) {
+ case LiteralType:
+ delete literal;
+ break;
+ case RangeType:
+ delete range;
+ break;
+ case OrExprType:
+ delete reItem;
+ break;
+ case RegExprType:
+ delete regExpr;
+ break;
+ case ReferenceType:
+ break;
+ case ParenType:
+ delete join;
+ break;
+ case LongestMatchType:
+ delete longestMatch;
+ break;
+ }
+}
+
+/* Evaluate a factor node. */
+FsmAp *Factor::walk( ParseData *pd )
+{
+ FsmAp *rtnVal = 0;
+ switch ( type ) {
+ case LiteralType:
+ rtnVal = literal->walk( pd );
+ break;
+ case RangeType:
+ rtnVal = range->walk( pd );
+ break;
+ case OrExprType:
+ rtnVal = reItem->walk( pd, 0 );
+ break;
+ case RegExprType:
+ rtnVal = regExpr->walk( pd, 0 );
+ break;
+ case ReferenceType:
+ rtnVal = varDef->walk( pd );
+ break;
+ case ParenType:
+ rtnVal = join->walk( pd );
+ break;
+ case LongestMatchType:
+ rtnVal = longestMatch->walk( pd );
+ break;
+ }
+
+ return rtnVal;
+}
+
+void Factor::makeNameTree( ParseData *pd )
+{
+ switch ( type ) {
+ case LiteralType:
+ case RangeType:
+ case OrExprType:
+ case RegExprType:
+ break;
+ case ReferenceType:
+ varDef->makeNameTree( loc, pd );
+ break;
+ case ParenType:
+ join->makeNameTree( pd );
+ break;
+ case LongestMatchType:
+ longestMatch->makeNameTree( pd );
+ break;
+ }
+}
+
+void Factor::resolveNameRefs( ParseData *pd )
+{
+ switch ( type ) {
+ case LiteralType:
+ case RangeType:
+ case OrExprType:
+ case RegExprType:
+ break;
+ case ReferenceType:
+ varDef->resolveNameRefs( pd );
+ break;
+ case ParenType:
+ join->resolveNameRefs( pd );
+ break;
+ case LongestMatchType:
+ longestMatch->resolveNameRefs( pd );
+ break;
+ }
+}
+
+/* Clean up a range object. Must delete the two literals. */
+Range::~Range()
+{
+ delete lowerLit;
+ delete upperLit;
+}
+
+/* Evaluate a range. Gets the lower an upper key and makes an fsm range. */
+FsmAp *Range::walk( ParseData *pd )
+{
+ /* Construct and verify the suitability of the lower end of the range. */
+ FsmAp *lowerFsm = lowerLit->walk( pd );
+ if ( !lowerFsm->checkSingleCharMachine() ) {
+ error(lowerLit->token.loc) <<
+ "bad range lower end, must be a single character" << endl;
+ }
+
+ /* Construct and verify the upper end. */
+ FsmAp *upperFsm = upperLit->walk( pd );
+ if ( !upperFsm->checkSingleCharMachine() ) {
+ error(upperLit->token.loc) <<
+ "bad range upper end, must be a single character" << endl;
+ }
+
+ /* Grab the keys from the machines, then delete them. */
+ Key lowKey = lowerFsm->startState->outList.head->lowKey;
+ Key highKey = upperFsm->startState->outList.head->lowKey;
+ delete lowerFsm;
+ delete upperFsm;
+
+ /* Validate the range. */
+ if ( lowKey > highKey ) {
+ /* Recover by setting upper to lower; */
+ error(lowerLit->token.loc) << "lower end of range is greater then upper end" << endl;
+ highKey = lowKey;
+ }
+
+ /* Return the range now that it is validated. */
+ FsmAp *retFsm = new FsmAp();
+ retFsm->rangeFsm( lowKey, highKey );
+ return retFsm;
+}
+
+/* Evaluate a literal object. */
+FsmAp *Literal::walk( ParseData *pd )
+{
+ /* FsmAp to return, is the alphabet signed. */
+ FsmAp *rtnVal = 0;
+
+ switch ( type ) {
+ case Number: {
+ /* Make the fsm key in int format. */
+ Key fsmKey = makeFsmKeyNum( token.data, token.loc, pd );
+ /* Make the new machine. */
+ rtnVal = new FsmAp();
+ rtnVal->concatFsm( fsmKey );
+ break;
+ }
+ case LitString: {
+ /* Make the array of keys in int format. */
+ long length;
+ bool caseInsensitive;
+ char *data = prepareLitString( token.loc, token.data, token.length,
+ length, caseInsensitive );
+ Key *arr = new Key[length];
+ makeFsmKeyArray( arr, data, length, pd );
+
+ /* Make the new machine. */
+ rtnVal = new FsmAp();
+ if ( caseInsensitive )
+ rtnVal->concatFsmCI( arr, length );
+ else
+ rtnVal->concatFsm( arr, length );
+ delete[] data;
+ delete[] arr;
+ break;
+ }}
+ return rtnVal;
+}
+
+/* Clean up after a regular expression object. */
+RegExpr::~RegExpr()
+{
+ switch ( type ) {
+ case RecurseItem:
+ delete regExpr;
+ delete item;
+ break;
+ case Empty:
+ break;
+ }
+}
+
+/* Evaluate a regular expression object. */
+FsmAp *RegExpr::walk( ParseData *pd, RegExpr *rootRegex )
+{
+ /* This is the root regex, pass down a pointer to this. */
+ if ( rootRegex == 0 )
+ rootRegex = this;
+
+ FsmAp *rtnVal = 0;
+ switch ( type ) {
+ case RecurseItem: {
+ /* Walk both items. */
+ rtnVal = regExpr->walk( pd, rootRegex );
+ FsmAp *fsm2 = item->walk( pd, rootRegex );
+ rtnVal->concatOp( fsm2 );
+ break;
+ }
+ case Empty: {
+ rtnVal = new FsmAp();
+ rtnVal->lambdaFsm();
+ break;
+ }
+ }
+ return rtnVal;
+}
+
+/* Clean up after an item in a regular expression. */
+ReItem::~ReItem()
+{
+ switch ( type ) {
+ case Data:
+ case Dot:
+ break;
+ case OrBlock:
+ case NegOrBlock:
+ delete orBlock;
+ break;
+ }
+}
+
+/* Evaluate a regular expression object. */
+FsmAp *ReItem::walk( ParseData *pd, RegExpr *rootRegex )
+{
+ /* The fsm to return, is the alphabet signed? */
+ FsmAp *rtnVal = 0;
+
+ switch ( type ) {
+ case Data: {
+ /* Move the data into an integer array and make a concat fsm. */
+ Key *arr = new Key[token.length];
+ makeFsmKeyArray( arr, token.data, token.length, pd );
+
+ /* Make the concat fsm. */
+ rtnVal = new FsmAp();
+ if ( rootRegex != 0 && rootRegex->caseInsensitive )
+ rtnVal->concatFsmCI( arr, token.length );
+ else
+ rtnVal->concatFsm( arr, token.length );
+ delete[] arr;
+ break;
+ }
+ case Dot: {
+ /* Make the dot fsm. */
+ rtnVal = dotFsm( pd );
+ break;
+ }
+ case OrBlock: {
+ /* Get the or block and minmize it. */
+ rtnVal = orBlock->walk( pd, rootRegex );
+ if ( rtnVal == 0 ) {
+ rtnVal = new FsmAp();
+ rtnVal->lambdaFsm();
+ }
+ rtnVal->minimizePartition2();
+ break;
+ }
+ case NegOrBlock: {
+ /* Get the or block and minimize it. */
+ FsmAp *fsm = orBlock->walk( pd, rootRegex );
+ fsm->minimizePartition2();
+
+ /* Make a dot fsm and subtract from it. */
+ rtnVal = dotFsm( pd );
+ rtnVal->subtractOp( fsm );
+ rtnVal->minimizePartition2();
+ break;
+ }
+ }
+
+ /* If the item is followed by a star, then apply the star op. */
+ if ( star ) {
+ if ( rtnVal->startState->isFinState() ) {
+ warning(loc) << "applying kleene star to a machine that "
+ "accepts zero length word" << endl;
+ }
+
+ rtnVal->starOp();
+ rtnVal->minimizePartition2();
+ }
+ return rtnVal;
+}
+
+/* Clean up after an or block of a regular expression. */
+ReOrBlock::~ReOrBlock()
+{
+ switch ( type ) {
+ case RecurseItem:
+ delete orBlock;
+ delete item;
+ break;
+ case Empty:
+ break;
+ }
+}
+
+
+/* Evaluate an or block of a regular expression. */
+FsmAp *ReOrBlock::walk( ParseData *pd, RegExpr *rootRegex )
+{
+ FsmAp *rtnVal = 0;
+ switch ( type ) {
+ case RecurseItem: {
+ /* Evaluate the two fsm. */
+ FsmAp *fsm1 = orBlock->walk( pd, rootRegex );
+ FsmAp *fsm2 = item->walk( pd, rootRegex );
+ if ( fsm1 == 0 )
+ rtnVal = fsm2;
+ else {
+ fsm1->unionOp( fsm2 );
+ rtnVal = fsm1;
+ }
+ break;
+ }
+ case Empty: {
+ rtnVal = 0;
+ break;
+ }
+ }
+ return rtnVal;;
+}
+
+/* Evaluate an or block item of a regular expression. */
+FsmAp *ReOrItem::walk( ParseData *pd, RegExpr *rootRegex )
+{
+ /* The return value, is the alphabet signed? */
+ FsmAp *rtnVal = 0;
+ switch ( type ) {
+ case Data: {
+ /* Make the or machine. */
+ rtnVal = new FsmAp();
+
+ /* Put the or data into an array of ints. Note that we find unique
+ * keys. Duplicates are silently ignored. The alternative would be to
+ * issue warning or an error but since we can't with [a0-9a] or 'a' |
+ * 'a' don't bother here. */
+ KeySet keySet;
+ makeFsmUniqueKeyArray( keySet, token.data, token.length,
+ rootRegex != 0 ? rootRegex->caseInsensitive : false, pd );
+
+ /* Run the or operator. */
+ rtnVal->orFsm( keySet.data, keySet.length() );
+ break;
+ }
+ case Range: {
+ /* Make the upper and lower keys. */
+ Key lowKey = makeFsmKeyChar( lower, pd );
+ Key highKey = makeFsmKeyChar( upper, pd );
+
+ /* Validate the range. */
+ if ( lowKey > highKey ) {
+ /* Recover by setting upper to lower; */
+ error(loc) << "lower end of range is greater then upper end" << endl;
+ highKey = lowKey;
+ }
+
+ /* Make the range machine. */
+ rtnVal = new FsmAp();
+ rtnVal->rangeFsm( lowKey, highKey );
+
+ if ( rootRegex != 0 && rootRegex->caseInsensitive ) {
+ if ( lowKey <= 'Z' && 'A' <= highKey ) {
+ Key otherLow = lowKey < 'A' ? Key('A') : lowKey;
+ Key otherHigh = 'Z' < highKey ? Key('Z') : highKey;
+
+ otherLow = 'a' + ( otherLow - 'A' );
+ otherHigh = 'a' + ( otherHigh - 'A' );
+
+ FsmAp *otherRange = new FsmAp();
+ otherRange->rangeFsm( otherLow, otherHigh );
+ rtnVal->unionOp( otherRange );
+ rtnVal->minimizePartition2();
+ }
+ else if ( lowKey <= 'z' && 'a' <= highKey ) {
+ Key otherLow = lowKey < 'a' ? Key('a') : lowKey;
+ Key otherHigh = 'z' < highKey ? Key('z') : highKey;
+
+ otherLow = 'A' + ( otherLow - 'a' );
+ otherHigh = 'A' + ( otherHigh - 'a' );
+
+ FsmAp *otherRange = new FsmAp();
+ otherRange->rangeFsm( otherLow, otherHigh );
+ rtnVal->unionOp( otherRange );
+ rtnVal->minimizePartition2();
+ }
+ }
+
+ break;
+ }}
+ return rtnVal;
+}