diff options
author | smalov <smalov@yandex-team.ru> | 2022-02-10 16:47:36 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:47:36 +0300 |
commit | f70d9720e13aef3a935e3f405b0eac554529e76e (patch) | |
tree | 5519c392aebdb16153197de07e4774c0a2be261a /contrib/tools/ragel6/parsedata.cpp | |
parent | 7b659037613268d5eac4a1b6a7c5eff3cd36d4bf (diff) | |
download | ydb-f70d9720e13aef3a935e3f405b0eac554529e76e.tar.gz |
Restoring authorship annotation for <smalov@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/tools/ragel6/parsedata.cpp')
-rw-r--r-- | contrib/tools/ragel6/parsedata.cpp | 2894 |
1 files changed, 1447 insertions, 1447 deletions
diff --git a/contrib/tools/ragel6/parsedata.cpp b/contrib/tools/ragel6/parsedata.cpp index eafe73e524..31ed5f0f12 100644 --- a/contrib/tools/ragel6/parsedata.cpp +++ b/contrib/tools/ragel6/parsedata.cpp @@ -1,131 +1,131 @@ -/* - * Copyright 2001-2008 Adrian Thurston <thurston@complang.org> - */ - -/* This file is part of Ragel. - * - * Ragel is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. +/* + * Copyright 2001-2008 Adrian Thurston <thurston@complang.org> + */ + +/* This file is part of Ragel. * - * Ragel is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Ragel; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include <iostream> -#include <iomanip> -#include <errno.h> -#include <stdlib.h> -#include <limits.h> - -#include "ragel.h" -#include "rlparse.h" -#include "parsedata.h" -#include "parsetree.h" -#include "mergesort.h" -#include "xmlcodegen.h" -#include "version.h" -#include "inputdata.h" - -using namespace std; - -char mainMachine[] = "main"; - -void Token::set( const char *str, int len ) -{ - length = len; - data = new char[len+1]; - memcpy( data, str, len ); - data[len] = 0; -} - -void Token::append( const Token &other ) -{ - int newLength = length + other.length; - char *newString = new char[newLength+1]; - memcpy( newString, data, length ); - memcpy( newString + length, other.data, other.length ); - newString[newLength] = 0; - data = newString; - length = newLength; -} - -/* Perform minimization after an operation according - * to the command line args. */ -void afterOpMinimize( FsmAp *fsm, bool lastInSeq ) -{ - /* Switch on the prefered minimization algorithm. */ - if ( minimizeOpt == MinimizeEveryOp || ( minimizeOpt == MinimizeMostOps && lastInSeq ) ) { - /* First clean up the graph. FsmAp operations may leave these - * lying around. There should be no dead end states. The subtract - * intersection operators are the only places where they may be - * created and those operators clean them up. */ - fsm->removeUnreachableStates(); - - switch ( minimizeLevel ) { - case MinimizeApprox: - fsm->minimizeApproximate(); - break; - case MinimizePartition1: - fsm->minimizePartition1(); - break; - case MinimizePartition2: - fsm->minimizePartition2(); - break; - case MinimizeStable: - fsm->minimizeStable(); - break; - } - } -} - -/* Count the transitions in the fsm by walking the state list. */ -int countTransitions( FsmAp *fsm ) -{ - int numTrans = 0; - StateAp *state = fsm->stateList.head; - while ( state != 0 ) { - numTrans += state->outList.length(); - state = state->next; - } - return numTrans; -} - -Key makeFsmKeyHex( char *str, const InputLoc &loc, ParseData *pd ) -{ - /* Reset errno so we can check for overflow or underflow. In the event of - * an error, sets the return val to the upper or lower bound being tested - * against. */ - errno = 0; - unsigned int size = keyOps->alphType->size; - bool unusedBits = size < sizeof(unsigned long); - - unsigned long ul = strtoul( str, 0, 16 ); - - if ( errno == ERANGE || ( unusedBits && ul >> (size * 8) ) ) { - error(loc) << "literal " << str << " overflows the alphabet type" << endl; - ul = 1 << (size * 8); - } - - if ( unusedBits && keyOps->alphType->isSigned && ul >> (size * 8 - 1) ) + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <iostream> +#include <iomanip> +#include <errno.h> +#include <stdlib.h> +#include <limits.h> + +#include "ragel.h" +#include "rlparse.h" +#include "parsedata.h" +#include "parsetree.h" +#include "mergesort.h" +#include "xmlcodegen.h" +#include "version.h" +#include "inputdata.h" + +using namespace std; + +char mainMachine[] = "main"; + +void Token::set( const char *str, int len ) +{ + length = len; + data = new char[len+1]; + memcpy( data, str, len ); + data[len] = 0; +} + +void Token::append( const Token &other ) +{ + int newLength = length + other.length; + char *newString = new char[newLength+1]; + memcpy( newString, data, length ); + memcpy( newString + length, other.data, other.length ); + newString[newLength] = 0; + data = newString; + length = newLength; +} + +/* Perform minimization after an operation according + * to the command line args. */ +void afterOpMinimize( FsmAp *fsm, bool lastInSeq ) +{ + /* Switch on the prefered minimization algorithm. */ + if ( minimizeOpt == MinimizeEveryOp || ( minimizeOpt == MinimizeMostOps && lastInSeq ) ) { + /* First clean up the graph. FsmAp operations may leave these + * lying around. There should be no dead end states. The subtract + * intersection operators are the only places where they may be + * created and those operators clean them up. */ + fsm->removeUnreachableStates(); + + switch ( minimizeLevel ) { + case MinimizeApprox: + fsm->minimizeApproximate(); + break; + case MinimizePartition1: + fsm->minimizePartition1(); + break; + case MinimizePartition2: + fsm->minimizePartition2(); + break; + case MinimizeStable: + fsm->minimizeStable(); + break; + } + } +} + +/* Count the transitions in the fsm by walking the state list. */ +int countTransitions( FsmAp *fsm ) +{ + int numTrans = 0; + StateAp *state = fsm->stateList.head; + while ( state != 0 ) { + numTrans += state->outList.length(); + state = state->next; + } + return numTrans; +} + +Key makeFsmKeyHex( char *str, const InputLoc &loc, ParseData *pd ) +{ + /* Reset errno so we can check for overflow or underflow. In the event of + * an error, sets the return val to the upper or lower bound being tested + * against. */ + errno = 0; + unsigned int size = keyOps->alphType->size; + bool unusedBits = size < sizeof(unsigned long); + + unsigned long ul = strtoul( str, 0, 16 ); + + if ( errno == ERANGE || ( unusedBits && ul >> (size * 8) ) ) { + error(loc) << "literal " << str << " overflows the alphabet type" << endl; + ul = 1 << (size * 8); + } + + if ( unusedBits && keyOps->alphType->isSigned && ul >> (size * 8 - 1) ) ul |= ( (unsigned long)(-1L) >> (size*8) ) << (size*8); - - return Key( (long)ul ); -} - -#ifdef _MSC_VER -# define strtoll _strtoi64 -#endif - -Key makeFsmKeyDec( char *str, const InputLoc &loc, ParseData *pd ) -{ + + return Key( (long)ul ); +} + +#ifdef _MSC_VER +# define strtoll _strtoi64 +#endif + +Key makeFsmKeyDec( char *str, const InputLoc &loc, ParseData *pd ) +{ if ( keyOps->alphType->isSigned ) { /* Convert the number to a decimal. First reset errno so we can check * for overflow or underflow. */ @@ -147,7 +147,7 @@ Key makeFsmKeyDec( char *str, const InputLoc &loc, ParseData *pd ) } return Key( (long)ll ); - } + } else { /* Convert the number to a decimal. First reset errno so we can check * for overflow or underflow. */ @@ -169,1323 +169,1323 @@ Key makeFsmKeyDec( char *str, const InputLoc &loc, ParseData *pd ) } return Key( (unsigned long)ull ); - } -} - -/* Make an fsm key in int format (what the fsm graph uses) from an alphabet - * number returned by the parser. Validates that the number doesn't overflow - * the alphabet type. */ -Key makeFsmKeyNum( char *str, const InputLoc &loc, ParseData *pd ) -{ - /* Switch on hex/decimal format. */ - if ( str[0] == '0' && str[1] == 'x' ) - return makeFsmKeyHex( str, loc, pd ); - else - return makeFsmKeyDec( str, loc, pd ); -} - -/* Make an fsm int format (what the fsm graph uses) from a single character. - * Performs proper conversion depending on signed/unsigned property of the - * alphabet. */ -Key makeFsmKeyChar( char c, ParseData *pd ) -{ - if ( keyOps->isSigned ) { - /* Copy from a char type. */ - return Key( c ); - } - else { - /* Copy from an unsigned byte type. */ - return Key( (unsigned char)c ); - } -} - -/* Make an fsm key array in int format (what the fsm graph uses) from a string - * of characters. Performs proper conversion depending on signed/unsigned - * property of the alphabet. */ -void makeFsmKeyArray( Key *result, char *data, int len, ParseData *pd ) -{ - if ( keyOps->isSigned ) { - /* Copy from a char star type. */ - char *src = data; - for ( int i = 0; i < len; i++ ) - result[i] = Key(src[i]); - } - else { - /* Copy from an unsigned byte ptr type. */ - unsigned char *src = (unsigned char*) data; - for ( int i = 0; i < len; i++ ) - result[i] = Key(src[i]); - } -} - -/* Like makeFsmKeyArray except the result has only unique keys. They ordering - * will be changed. */ -void makeFsmUniqueKeyArray( KeySet &result, char *data, int len, - bool caseInsensitive, ParseData *pd ) -{ - /* Use a transitions list for getting unique keys. */ - if ( keyOps->isSigned ) { - /* Copy from a char star type. */ - char *src = data; - for ( int si = 0; si < len; si++ ) { - Key key( src[si] ); - result.insert( key ); - if ( caseInsensitive ) { - if ( key.isLower() ) - result.insert( key.toUpper() ); - else if ( key.isUpper() ) - result.insert( key.toLower() ); - } - } - } - else { - /* Copy from an unsigned byte ptr type. */ - unsigned char *src = (unsigned char*) data; - for ( int si = 0; si < len; si++ ) { - Key key( src[si] ); - result.insert( key ); - if ( caseInsensitive ) { - if ( key.isLower() ) - result.insert( key.toUpper() ); - else if ( key.isUpper() ) - result.insert( key.toLower() ); - } - } - } -} - -FsmAp *dotFsm( ParseData *pd ) -{ - FsmAp *retFsm = new FsmAp(); - retFsm->rangeFsm( keyOps->minKey, keyOps->maxKey ); - return retFsm; -} - -FsmAp *dotStarFsm( ParseData *pd ) -{ - FsmAp *retFsm = new FsmAp(); - retFsm->rangeStarFsm( keyOps->minKey, keyOps->maxKey ); - return retFsm; -} - -/* Make a builtin type. Depends on the signed nature of the alphabet type. */ -FsmAp *makeBuiltin( BuiltinMachine builtin, ParseData *pd ) -{ - /* FsmAp created to return. */ - FsmAp *retFsm = 0; - bool isSigned = keyOps->isSigned; - - switch ( builtin ) { - case BT_Any: { - /* All characters. */ - retFsm = dotFsm( pd ); - break; - } - case BT_Ascii: { - /* Ascii characters 0 to 127. */ - retFsm = new FsmAp(); - retFsm->rangeFsm( 0, 127 ); - break; - } - case BT_Extend: { - /* Ascii extended characters. This is the full byte range. Dependent - * on signed, vs no signed. If the alphabet is one byte then just use - * dot fsm. */ - if ( isSigned ) { - retFsm = new FsmAp(); - retFsm->rangeFsm( -128, 127 ); - } - else { - retFsm = new FsmAp(); - retFsm->rangeFsm( 0, 255 ); - } - break; - } - case BT_Alpha: { - /* Alpha [A-Za-z]. */ - FsmAp *upper = new FsmAp(), *lower = new FsmAp(); - upper->rangeFsm( 'A', 'Z' ); - lower->rangeFsm( 'a', 'z' ); - upper->unionOp( lower ); - upper->minimizePartition2(); - retFsm = upper; - break; - } - case BT_Digit: { - /* Digits [0-9]. */ - retFsm = new FsmAp(); - retFsm->rangeFsm( '0', '9' ); - break; - } - case BT_Alnum: { - /* Alpha numerics [0-9A-Za-z]. */ - FsmAp *digit = new FsmAp(), *lower = new FsmAp(); - FsmAp *upper = new FsmAp(); - digit->rangeFsm( '0', '9' ); - upper->rangeFsm( 'A', 'Z' ); - lower->rangeFsm( 'a', 'z' ); - digit->unionOp( upper ); - digit->unionOp( lower ); - digit->minimizePartition2(); - retFsm = digit; - break; - } - case BT_Lower: { - /* Lower case characters. */ - retFsm = new FsmAp(); - retFsm->rangeFsm( 'a', 'z' ); - break; - } - case BT_Upper: { - /* Upper case characters. */ - retFsm = new FsmAp(); - retFsm->rangeFsm( 'A', 'Z' ); - break; - } - case BT_Cntrl: { - /* Control characters. */ - FsmAp *cntrl = new FsmAp(); - FsmAp *highChar = new FsmAp(); - cntrl->rangeFsm( 0, 31 ); - highChar->concatFsm( 127 ); - cntrl->unionOp( highChar ); - cntrl->minimizePartition2(); - retFsm = cntrl; - break; - } - case BT_Graph: { - /* Graphical ascii characters [!-~]. */ - retFsm = new FsmAp(); - retFsm->rangeFsm( '!', '~' ); - break; - } - case BT_Print: { - /* Printable characters. Same as graph except includes space. */ - retFsm = new FsmAp(); - retFsm->rangeFsm( ' ', '~' ); - break; - } - case BT_Punct: { - /* Punctuation. */ - FsmAp *range1 = new FsmAp(); - FsmAp *range2 = new FsmAp(); - FsmAp *range3 = new FsmAp(); - FsmAp *range4 = new FsmAp(); - range1->rangeFsm( '!', '/' ); - range2->rangeFsm( ':', '@' ); - range3->rangeFsm( '[', '`' ); - range4->rangeFsm( '{', '~' ); - range1->unionOp( range2 ); - range1->unionOp( range3 ); - range1->unionOp( range4 ); - range1->minimizePartition2(); - retFsm = range1; - break; - } - case BT_Space: { - /* Whitespace: [\t\v\f\n\r ]. */ - FsmAp *cntrl = new FsmAp(); - FsmAp *space = new FsmAp(); - cntrl->rangeFsm( '\t', '\r' ); - space->concatFsm( ' ' ); - cntrl->unionOp( space ); - cntrl->minimizePartition2(); - retFsm = cntrl; - break; - } - case BT_Xdigit: { - /* Hex digits [0-9A-Fa-f]. */ - FsmAp *digit = new FsmAp(); - FsmAp *upper = new FsmAp(); - FsmAp *lower = new FsmAp(); - digit->rangeFsm( '0', '9' ); - upper->rangeFsm( 'A', 'F' ); - lower->rangeFsm( 'a', 'f' ); - digit->unionOp( upper ); - digit->unionOp( lower ); - digit->minimizePartition2(); - retFsm = digit; - break; - } - case BT_Lambda: { - retFsm = new FsmAp(); - retFsm->lambdaFsm(); - break; - } - case BT_Empty: { - retFsm = new FsmAp(); - retFsm->emptyFsm(); - break; - }} - - return retFsm; -} - -/* Check if this name inst or any name inst below is referenced. */ -bool NameInst::anyRefsRec() -{ - if ( numRefs > 0 ) - return true; - - /* Recurse on children until true. */ - for ( NameVect::Iter ch = childVect; ch.lte(); ch++ ) { - if ( (*ch)->anyRefsRec() ) - return true; - } - - return false; -} - -/* - * ParseData - */ - -/* Initialize the structure that will collect info during the parse of a - * machine. */ -ParseData::ParseData( const char *fileName, char *sectionName, - const InputLoc §ionLoc ) -: - sectionGraph(0), - generatingSectionSubset(false), - nextPriorKey(0), - /* 0 is reserved for global error actions. */ - nextLocalErrKey(1), - nextNameId(0), - nextCondId(0), - alphTypeSet(false), - getKeyExpr(0), - accessExpr(0), - prePushExpr(0), - postPopExpr(0), - pExpr(0), - peExpr(0), - eofExpr(0), - csExpr(0), - topExpr(0), - stackExpr(0), - actExpr(0), - tokstartExpr(0), - tokendExpr(0), - dataExpr(0), - lowerNum(0), - upperNum(0), - fileName(fileName), - sectionName(sectionName), - sectionLoc(sectionLoc), - curActionOrd(0), - curPriorOrd(0), - rootName(0), - exportsRootName(0), - nextEpsilonResolvedLink(0), - nextLongestMatchId(1), - lmRequiresErrorState(false), - cgd(0) -{ - /* Initialize the dictionary of graphs. This is our symbol table. The - * initialization needs to be done on construction which happens at the - * beginning of a machine spec so any assignment operators can reference - * the builtins. */ - initGraphDict(); -} - -/* Clean up the data collected during a parse. */ -ParseData::~ParseData() -{ - /* Delete all the nodes in the action list. Will cause all the - * string data that represents the actions to be deallocated. */ - actionList.empty(); -} - -/* Make a name id in the current name instantiation scope if it is not - * already there. */ -NameInst *ParseData::addNameInst( const InputLoc &loc, const char *data, bool isLabel ) -{ - /* Create the name instantitaion object and insert it. */ - NameInst *newNameInst = new NameInst( loc, curNameInst, data, nextNameId++, isLabel ); - curNameInst->childVect.append( newNameInst ); - if ( data != 0 ) - curNameInst->children.insertMulti( data, newNameInst ); - return newNameInst; -} - -void ParseData::initNameWalk() -{ - curNameInst = rootName; - curNameChild = 0; -} - -void ParseData::initExportsNameWalk() -{ - curNameInst = exportsRootName; - curNameChild = 0; -} - -/* Goes into the next child scope. The number of the child is already set up. - * We need this for the syncronous name tree and parse tree walk to work - * properly. It is reset on entry into a scope and advanced on poping of a - * scope. A call to enterNameScope should be accompanied by a corresponding - * popNameScope. */ -NameFrame ParseData::enterNameScope( bool isLocal, int numScopes ) -{ - /* Save off the current data. */ - NameFrame retFrame; - retFrame.prevNameInst = curNameInst; - retFrame.prevNameChild = curNameChild; - retFrame.prevLocalScope = localNameScope; - - /* Enter into the new name scope. */ - for ( int i = 0; i < numScopes; i++ ) { - curNameInst = curNameInst->childVect[curNameChild]; - curNameChild = 0; - } - - if ( isLocal ) - localNameScope = curNameInst; - - return retFrame; -} - -/* Return from a child scope to a parent. The parent info must be specified as - * an argument and is obtained from the corresponding call to enterNameScope. - * */ -void ParseData::popNameScope( const NameFrame &frame ) -{ - /* Pop the name scope. */ - curNameInst = frame.prevNameInst; - curNameChild = frame.prevNameChild+1; - localNameScope = frame.prevLocalScope; -} - -void ParseData::resetNameScope( const NameFrame &frame ) -{ - /* Pop the name scope. */ - curNameInst = frame.prevNameInst; - curNameChild = frame.prevNameChild; - localNameScope = frame.prevLocalScope; -} - - -void ParseData::unsetObsoleteEntries( FsmAp *graph ) -{ - /* Loop the reference names and increment the usage. Names that are no - * longer needed will be unset in graph. */ - for ( NameVect::Iter ref = curNameInst->referencedNames; ref.lte(); ref++ ) { - /* Get the name. */ - NameInst *name = *ref; - name->numUses += 1; - - /* If the name is no longer needed unset its corresponding entry. */ - if ( name->numUses == name->numRefs ) { - assert( graph->entryPoints.find( name->id ) != 0 ); - graph->unsetEntry( name->id ); - assert( graph->entryPoints.find( name->id ) == 0 ); - } - } -} - -NameSet ParseData::resolvePart( NameInst *refFrom, const char *data, bool recLabelsOnly ) -{ - /* Queue needed for breadth-first search, load it with the start node. */ - NameInstList nameQueue; - nameQueue.append( refFrom ); - - NameSet result; - while ( nameQueue.length() > 0 ) { - /* Pull the next from location off the queue. */ - NameInst *from = nameQueue.detachFirst(); - - /* Look for the name. */ - NameMapEl *low, *high; - if ( from->children.findMulti( data, low, high ) ) { - /* Record all instances of the name. */ - for ( ; low <= high; low++ ) - result.insert( low->value ); - } - - /* Name not there, do breadth-first operation of appending all - * childrent to the processing queue. */ - for ( NameVect::Iter name = from->childVect; name.lte(); name++ ) { - if ( !recLabelsOnly || (*name)->isLabel ) - nameQueue.append( *name ); - } - } - - /* Queue exhausted and name never found. */ - return result; -} - -void ParseData::resolveFrom( NameSet &result, NameInst *refFrom, - const NameRef &nameRef, int namePos ) -{ - /* Look for the name in the owning scope of the factor with aug. */ - NameSet partResult = resolvePart( refFrom, nameRef[namePos], false ); - - /* If there are more parts to the name then continue on. */ - if ( ++namePos < nameRef.length() ) { - /* There are more components to the name, search using all the part - * results as the base. */ - for ( NameSet::Iter name = partResult; name.lte(); name++ ) - resolveFrom( result, *name, nameRef, namePos ); - } - else { - /* This is the last component, append the part results to the final - * results. */ - result.insert( partResult ); - } -} - -/* Write out a name reference. */ -ostream &operator<<( ostream &out, const NameRef &nameRef ) -{ - int pos = 0; - if ( nameRef[pos] == 0 ) { - out << "::"; - pos += 1; - } - out << nameRef[pos++]; - for ( ; pos < nameRef.length(); pos++ ) - out << "::" << nameRef[pos]; - return out; -} - -ostream &operator<<( ostream &out, const NameInst &nameInst ) -{ - /* Count the number fully qualified name parts. */ - int numParents = 0; - NameInst *curParent = nameInst.parent; - while ( curParent != 0 ) { - numParents += 1; - curParent = curParent->parent; - } - - /* Make an array and fill it in. */ - curParent = nameInst.parent; - NameInst **parents = new NameInst*[numParents]; - for ( int p = numParents-1; p >= 0; p-- ) { - parents[p] = curParent; - curParent = curParent->parent; - } - - /* Write the parents out, skip the root. */ - for ( int p = 1; p < numParents; p++ ) - out << "::" << ( parents[p]->name != 0 ? parents[p]->name : "<ANON>" ); - - /* Write the name and cleanup. */ - out << "::" << ( nameInst.name != 0 ? nameInst.name : "<ANON>" ); - delete[] parents; - return out; -} - -struct CmpNameInstLoc -{ - static int compare( const NameInst *ni1, const NameInst *ni2 ) - { - if ( ni1->loc.line < ni2->loc.line ) - return -1; - else if ( ni1->loc.line > ni2->loc.line ) - return 1; - else if ( ni1->loc.col < ni2->loc.col ) - return -1; - else if ( ni1->loc.col > ni2->loc.col ) - return 1; - return 0; - } -}; - -void errorStateLabels( const NameSet &resolved ) -{ - MergeSort<NameInst*, CmpNameInstLoc> mergeSort; - mergeSort.sort( resolved.data, resolved.length() ); - for ( NameSet::Iter res = resolved; res.lte(); res++ ) - error((*res)->loc) << " -> " << **res << endl; -} - - -NameInst *ParseData::resolveStateRef( const NameRef &nameRef, InputLoc &loc, Action *action ) -{ - NameInst *nameInst = 0; - - /* Do the local search if the name is not strictly a root level name - * search. */ - if ( nameRef[0] != 0 ) { - /* If the action is referenced, resolve all of them. */ - if ( action != 0 && action->actionRefs.length() > 0 ) { - /* Look for the name in all referencing scopes. */ - NameSet resolved; - for ( ActionRefs::Iter actRef = action->actionRefs; actRef.lte(); actRef++ ) - resolveFrom( resolved, *actRef, nameRef, 0 ); - - if ( resolved.length() > 0 ) { - /* Take the first one. */ - nameInst = resolved[0]; - if ( resolved.length() > 1 ) { - /* Complain about the multiple references. */ - error(loc) << "state reference " << nameRef << - " resolves to multiple entry points" << endl; - errorStateLabels( resolved ); - } - } - } - } - - /* If not found in the local scope, look in global. */ - if ( nameInst == 0 ) { - NameSet resolved; - int fromPos = nameRef[0] != 0 ? 0 : 1; - resolveFrom( resolved, rootName, nameRef, fromPos ); - - if ( resolved.length() > 0 ) { - /* Take the first. */ - nameInst = resolved[0]; - if ( resolved.length() > 1 ) { - /* Complain about the multiple references. */ - error(loc) << "state reference " << nameRef << - " resolves to multiple entry points" << endl; - errorStateLabels( resolved ); - } - } - } - - if ( nameInst == 0 ) { - /* If not found then complain. */ - error(loc) << "could not resolve state reference " << nameRef << endl; - } - return nameInst; -} - -void ParseData::resolveNameRefs( InlineList *inlineList, Action *action ) -{ - for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) { - switch ( item->type ) { - case InlineItem::Entry: case InlineItem::Goto: - case InlineItem::Call: case InlineItem::Next: { - /* Resolve, pass action for local search. */ - NameInst *target = resolveStateRef( *item->nameRef, item->loc, action ); - - /* Name lookup error reporting is handled by resolveStateRef. */ - if ( target != 0 ) { - /* Check if the target goes into a longest match. */ - NameInst *search = target->parent; - while ( search != 0 ) { - if ( search->isLongestMatch ) { - error(item->loc) << "cannot enter inside a longest " - "match construction as an entry point" << endl; - break; - } - search = search->parent; - } - - /* Record the reference in the name. This will cause the - * entry point to survive to the end of the graph - * generating walk. */ - target->numRefs += 1; - } - - item->nameTarg = target; - break; - } - default: - break; - } - - /* Some of the item types may have children. */ - if ( item->children != 0 ) - resolveNameRefs( item->children, action ); - } -} - -/* Resolve references to labels in actions. */ -void ParseData::resolveActionNameRefs() -{ - for ( ActionList::Iter act = actionList; act.lte(); act++ ) { - /* Only care about the actions that are referenced. */ - if ( act->actionRefs.length() > 0 ) - resolveNameRefs( act->inlineList, act ); - } -} - -/* Walk a name tree starting at from and fill the name index. */ -void ParseData::fillNameIndex( NameInst *from ) -{ - /* Fill the value for from in the name index. */ - nameIndex[from->id] = from; - - /* Recurse on the implicit final state and then all children. */ - if ( from->final != 0 ) - fillNameIndex( from->final ); - for ( NameVect::Iter name = from->childVect; name.lte(); name++ ) - fillNameIndex( *name ); -} - -void ParseData::makeRootNames() -{ - /* Create the root name. */ - rootName = new NameInst( InputLoc(), 0, 0, nextNameId++, false ); - exportsRootName = new NameInst( InputLoc(), 0, 0, nextNameId++, false ); -} - -/* Build the name tree and supporting data structures. */ -void ParseData::makeNameTree( GraphDictEl *dictEl ) -{ - /* Set up curNameInst for the walk. */ - initNameWalk(); - - if ( dictEl != 0 ) { - /* A start location has been specified. */ - dictEl->value->makeNameTree( dictEl->loc, this ); - } - else { - /* First make the name tree. */ - for ( GraphList::Iter glel = instanceList; glel.lte(); glel++ ) { - /* Recurse on the instance. */ - glel->value->makeNameTree( glel->loc, this ); - } - } - - /* The number of nodes in the tree can now be given by nextNameId */ - nameIndex = new NameInst*[nextNameId]; - memset( nameIndex, 0, sizeof(NameInst*)*nextNameId ); - fillNameIndex( rootName ); - fillNameIndex( exportsRootName ); -} - - -void ParseData::createBuiltin( const char *name, BuiltinMachine builtin ) -{ - Expression *expression = new Expression( builtin ); - Join *join = new Join( expression ); - MachineDef *machineDef = new MachineDef( join ); - VarDef *varDef = new VarDef( name, machineDef ); - GraphDictEl *graphDictEl = new GraphDictEl( name, varDef ); - graphDict.insert( graphDictEl ); -} - -/* Initialize the graph dict with builtin types. */ -void ParseData::initGraphDict( ) -{ - createBuiltin( "any", BT_Any ); - createBuiltin( "ascii", BT_Ascii ); - createBuiltin( "extend", BT_Extend ); - createBuiltin( "alpha", BT_Alpha ); - createBuiltin( "digit", BT_Digit ); - createBuiltin( "alnum", BT_Alnum ); - createBuiltin( "lower", BT_Lower ); - createBuiltin( "upper", BT_Upper ); - createBuiltin( "cntrl", BT_Cntrl ); - createBuiltin( "graph", BT_Graph ); - createBuiltin( "print", BT_Print ); - createBuiltin( "punct", BT_Punct ); - createBuiltin( "space", BT_Space ); - createBuiltin( "xdigit", BT_Xdigit ); - createBuiltin( "null", BT_Lambda ); - createBuiltin( "zlen", BT_Lambda ); - createBuiltin( "empty", BT_Empty ); -} - -/* Set the alphabet type. If the types are not valid returns false. */ -bool ParseData::setAlphType( const InputLoc &loc, char *s1, char *s2 ) -{ - alphTypeLoc = loc; - userAlphType = findAlphType( s1, s2 ); - alphTypeSet = true; - return userAlphType != 0; -} - -/* Set the alphabet type. If the types are not valid returns false. */ -bool ParseData::setAlphType( const InputLoc &loc, char *s1 ) -{ - alphTypeLoc = loc; - userAlphType = findAlphType( s1 ); - alphTypeSet = true; - return userAlphType != 0; -} - -bool ParseData::setVariable( char *var, InlineList *inlineList ) -{ - bool set = true; - - if ( strcmp( var, "p" ) == 0 ) - pExpr = inlineList; - else if ( strcmp( var, "pe" ) == 0 ) - peExpr = inlineList; - else if ( strcmp( var, "eof" ) == 0 ) - eofExpr = inlineList; - else if ( strcmp( var, "cs" ) == 0 ) - csExpr = inlineList; - else if ( strcmp( var, "data" ) == 0 ) - dataExpr = inlineList; - else if ( strcmp( var, "top" ) == 0 ) - topExpr = inlineList; - else if ( strcmp( var, "stack" ) == 0 ) - stackExpr = inlineList; - else if ( strcmp( var, "act" ) == 0 ) - actExpr = inlineList; - else if ( strcmp( var, "ts" ) == 0 ) - tokstartExpr = inlineList; - else if ( strcmp( var, "te" ) == 0 ) - tokendExpr = inlineList; - else - set = false; - - return set; -} - -/* Initialize the key operators object that will be referenced by all fsms - * created. */ -void ParseData::initKeyOps( ) -{ - /* Signedness and bounds. */ - HostType *alphType = alphTypeSet ? userAlphType : hostLang->defaultAlphType; - thisKeyOps.setAlphType( alphType ); - - if ( lowerNum != 0 ) { - /* If ranges are given then interpret the alphabet type. */ - thisKeyOps.minKey = makeFsmKeyNum( lowerNum, rangeLowLoc, this ); - thisKeyOps.maxKey = makeFsmKeyNum( upperNum, rangeHighLoc, this ); - } - - thisCondData.lastCondKey = thisKeyOps.maxKey; -} - -void ParseData::printNameInst( NameInst *nameInst, int level ) -{ - for ( int i = 0; i < level; i++ ) - cerr << " "; - cerr << (nameInst->name != 0 ? nameInst->name : "<ANON>") << - " id: " << nameInst->id << - " refs: " << nameInst->numRefs << - " uses: " << nameInst->numUses << endl; - for ( NameVect::Iter name = nameInst->childVect; name.lte(); name++ ) - printNameInst( *name, level+1 ); -} - -/* Remove duplicates of unique actions from an action table. */ -void ParseData::removeDups( ActionTable &table ) -{ - /* Scan through the table looking for unique actions to - * remove duplicates of. */ - for ( int i = 0; i < table.length(); i++ ) { - /* Remove any duplicates ahead of i. */ - for ( int r = i+1; r < table.length(); ) { - if ( table[r].value == table[i].value ) - table.vremove(r); - else - r += 1; - } - } -} - -/* Remove duplicates from action lists. This operates only on transition and - * eof action lists and so should be called once all actions have been - * transfered to their final resting place. */ -void ParseData::removeActionDups( FsmAp *graph ) -{ - /* Loop all states. */ - for ( StateList::Iter state = graph->stateList; state.lte(); state++ ) { - /* Loop all transitions. */ - for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) - removeDups( trans->actionTable ); - removeDups( state->toStateActionTable ); - removeDups( state->fromStateActionTable ); - removeDups( state->eofActionTable ); - } -} - -Action *ParseData::newAction( const char *name, InlineList *inlineList ) -{ - InputLoc loc; - loc.line = 1; - loc.col = 1; - loc.fileName = "NONE"; - - Action *action = new Action( loc, name, inlineList, nextCondId++ ); - action->actionRefs.append( rootName ); - actionList.append( action ); - return action; -} - -void ParseData::initLongestMatchData() -{ - if ( lmList.length() > 0 ) { - /* The initTokStart action resets the token start. */ - InlineList *il1 = new InlineList; - il1->append( new InlineItem( InputLoc(), InlineItem::LmInitTokStart ) ); - initTokStart = newAction( "initts", il1 ); - initTokStart->isLmAction = true; - - /* The initActId action gives act a default value. */ - InlineList *il4 = new InlineList; - il4->append( new InlineItem( InputLoc(), InlineItem::LmInitAct ) ); - initActId = newAction( "initact", il4 ); - initActId->isLmAction = true; - - /* The setTokStart action sets tokstart. */ - InlineList *il5 = new InlineList; - il5->append( new InlineItem( InputLoc(), InlineItem::LmSetTokStart ) ); - setTokStart = newAction( "ts", il5 ); - setTokStart->isLmAction = true; - - /* The setTokEnd action sets tokend. */ - InlineList *il3 = new InlineList; - il3->append( new InlineItem( InputLoc(), InlineItem::LmSetTokEnd ) ); - setTokEnd = newAction( "te", il3 ); - setTokEnd->isLmAction = true; - - /* The action will also need an ordering: ahead of all user action - * embeddings. */ - initTokStartOrd = curActionOrd++; - initActIdOrd = curActionOrd++; - setTokStartOrd = curActionOrd++; - setTokEndOrd = curActionOrd++; - } -} - -/* After building the graph, do some extra processing to ensure the runtime - * data of the longest mactch operators is consistent. */ -void ParseData::setLongestMatchData( FsmAp *graph ) -{ - if ( lmList.length() > 0 ) { - /* Make sure all entry points (targets of fgoto, fcall, fnext, fentry) - * init the tokstart. */ - for ( EntryMap::Iter en = graph->entryPoints; en.lte(); en++ ) { - /* This is run after duplicates are removed, we must guard against - * inserting a duplicate. */ - ActionTable &actionTable = en->value->toStateActionTable; - if ( ! actionTable.hasAction( initTokStart ) ) - actionTable.setAction( initTokStartOrd, initTokStart ); - } - - /* Find the set of states that are the target of transitions with - * actions that have calls. These states will be targeted by fret - * statements. */ - StateSet states; - for ( StateList::Iter state = graph->stateList; state.lte(); state++ ) { - for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { - for ( ActionTable::Iter ati = trans->actionTable; ati.lte(); ati++ ) { - if ( ati->value->anyCall && trans->toState != 0 ) - states.insert( trans->toState ); - } - } - } - - - /* Init tokstart upon entering the above collected states. */ - for ( StateSet::Iter ps = states; ps.lte(); ps++ ) { - /* This is run after duplicates are removed, we must guard against - * inserting a duplicate. */ - ActionTable &actionTable = (*ps)->toStateActionTable; - if ( ! actionTable.hasAction( initTokStart ) ) - actionTable.setAction( initTokStartOrd, initTokStart ); - } - } -} - -/* Make the graph from a graph dict node. Does minimization and state sorting. */ -FsmAp *ParseData::makeInstance( GraphDictEl *gdNode ) -{ - /* Build the graph from a walk of the parse tree. */ - FsmAp *graph = gdNode->value->walk( this ); - - /* Resolve any labels that point to multiple states. Any labels that are - * still around are referenced only by gotos and calls and they need to be - * made into deterministic entry points. */ - graph->deterministicEntry(); - - /* - * All state construction is now complete. - */ - - /* Transfer actions from the out action tables to eof action tables. */ - for ( StateSet::Iter state = graph->finStateSet; state.lte(); state++ ) - graph->transferOutActions( *state ); - - /* Transfer global error actions. */ - for ( StateList::Iter state = graph->stateList; state.lte(); state++ ) - graph->transferErrorActions( state, 0 ); - - if ( ::wantDupsRemoved ) - removeActionDups( graph ); - - /* Remove unreachable states. There should be no dead end states. The - * subtract and intersection operators are the only places where they may - * be created and those operators clean them up. */ - graph->removeUnreachableStates(); - - /* No more fsm operations are to be done. Action ordering numbers are - * no longer of use and will just hinder minimization. Clear them. */ - graph->nullActionKeys(); - - /* Transition priorities are no longer of use. We can clear them - * because they will just hinder minimization as well. Clear them. */ - graph->clearAllPriorities(); - - if ( minimizeOpt != MinimizeNone ) { - /* Minimize here even if we minimized at every op. Now that function - * keys have been cleared we may get a more minimal fsm. */ - switch ( minimizeLevel ) { - case MinimizeApprox: - graph->minimizeApproximate(); - break; - case MinimizeStable: - graph->minimizeStable(); - break; - case MinimizePartition1: - graph->minimizePartition1(); - break; - case MinimizePartition2: - graph->minimizePartition2(); - break; - } - } - - graph->compressTransitions(); - - return graph; -} - -void ParseData::printNameTree() -{ - /* Print the name instance map. */ - for ( NameVect::Iter name = rootName->childVect; name.lte(); name++ ) - printNameInst( *name, 0 ); - - cerr << "name index:" << endl; - /* Show that the name index is correct. */ - for ( int ni = 0; ni < nextNameId; ni++ ) { - cerr << ni << ": "; - const char *name = nameIndex[ni]->name; - cerr << ( name != 0 ? name : "<ANON>" ) << endl; - } -} - -FsmAp *ParseData::makeSpecific( GraphDictEl *gdNode ) -{ - /* Build the name tree and supporting data structures. */ - makeNameTree( gdNode ); - - /* Resove name references from gdNode. */ - initNameWalk(); - gdNode->value->resolveNameRefs( this ); - - /* Do not resolve action references. Since we are not building the entire - * graph there's a good chance that many name references will fail. This - * is okay since generating part of the graph is usually only done when - * inspecting the compiled machine. */ - - /* Same story for extern entry point references. */ - - /* Flag this case so that the XML code generator is aware that we haven't - * looked up name references in actions. It can then avoid segfaulting. */ - generatingSectionSubset = true; - - /* Just building the specified graph. */ - initNameWalk(); - FsmAp *mainGraph = makeInstance( gdNode ); - - return mainGraph; -} - -FsmAp *ParseData::makeAll() -{ - /* Build the name tree and supporting data structures. */ - makeNameTree( 0 ); - - /* Resove name references in the tree. */ - initNameWalk(); - for ( GraphList::Iter glel = instanceList; glel.lte(); glel++ ) - glel->value->resolveNameRefs( this ); - - /* Resolve action code name references. */ - resolveActionNameRefs(); - - /* Force name references to the top level instantiations. */ - for ( NameVect::Iter inst = rootName->childVect; inst.lte(); inst++ ) - (*inst)->numRefs += 1; - - FsmAp *mainGraph = 0; - FsmAp **graphs = new FsmAp*[instanceList.length()]; - int numOthers = 0; - - /* Make all the instantiations, we know that main exists in this list. */ - initNameWalk(); - for ( GraphList::Iter glel = instanceList; glel.lte(); glel++ ) { - if ( strcmp( glel->key, mainMachine ) == 0 ) { - /* Main graph is always instantiated. */ - mainGraph = makeInstance( glel ); - } - else { - /* Instantiate and store in others array. */ - graphs[numOthers++] = makeInstance( glel ); - } - } - - if ( mainGraph == 0 ) - mainGraph = graphs[--numOthers]; - - if ( numOthers > 0 ) { - /* Add all the other graphs into main. */ - mainGraph->globOp( graphs, numOthers ); - } - - delete[] graphs; - return mainGraph; -} - -void ParseData::analyzeAction( Action *action, InlineList *inlineList ) -{ - /* FIXME: Actions used as conditions should be very constrained. */ - for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) { - if ( item->type == InlineItem::Call || item->type == InlineItem::CallExpr ) - action->anyCall = true; - - /* Need to recurse into longest match items. */ - if ( item->type == InlineItem::LmSwitch ) { - LongestMatch *lm = item->longestMatch; - for ( LmPartList::Iter lmi = *lm->longestMatchList; lmi.lte(); lmi++ ) { - if ( lmi->action != 0 ) - analyzeAction( action, lmi->action->inlineList ); - } - } - - if ( item->type == InlineItem::LmOnLast || - item->type == InlineItem::LmOnNext || - item->type == InlineItem::LmOnLagBehind ) - { - LongestMatchPart *lmi = item->longestMatchPart; - if ( lmi->action != 0 ) - analyzeAction( action, lmi->action->inlineList ); - } - - if ( item->children != 0 ) - analyzeAction( action, item->children ); - } -} - - -/* Check actions for bad uses of fsm directives. We don't go inside longest - * match items in actions created by ragel, since we just want the user - * actions. */ -void ParseData::checkInlineList( Action *act, InlineList *inlineList ) -{ - for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) { - /* EOF checks. */ - if ( act->numEofRefs > 0 ) { - switch ( item->type ) { - /* Currently no checks. */ - default: - break; - } - } - - /* Recurse. */ - if ( item->children != 0 ) - checkInlineList( act, item->children ); - } -} - -void ParseData::checkAction( Action *action ) -{ - /* Check for actions with calls that are embedded within a longest match - * machine. */ - if ( !action->isLmAction && action->numRefs() > 0 && action->anyCall ) { - for ( ActionRefs::Iter ar = action->actionRefs; ar.lte(); ar++ ) { - NameInst *check = *ar; - while ( check != 0 ) { - if ( check->isLongestMatch ) { - error(action->loc) << "within a scanner, fcall is permitted" - " only in pattern actions" << endl; - break; - } - check = check->parent; - } - } - } - - checkInlineList( action, action->inlineList ); -} - - -void ParseData::analyzeGraph( FsmAp *graph ) -{ - for ( ActionList::Iter act = actionList; act.lte(); act++ ) - analyzeAction( act, act->inlineList ); - - for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) { - /* The transition list. */ - for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) { - for ( ActionTable::Iter at = trans->actionTable; at.lte(); at++ ) - at->value->numTransRefs += 1; - } - - for ( ActionTable::Iter at = st->toStateActionTable; at.lte(); at++ ) - at->value->numToStateRefs += 1; - - for ( ActionTable::Iter at = st->fromStateActionTable; at.lte(); at++ ) - at->value->numFromStateRefs += 1; - - for ( ActionTable::Iter at = st->eofActionTable; at.lte(); at++ ) - at->value->numEofRefs += 1; - - for ( StateCondList::Iter sc = st->stateCondList; sc.lte(); sc++ ) { - for ( CondSet::Iter sci = sc->condSpace->condSet; sci.lte(); sci++ ) - (*sci)->numCondRefs += 1; - } - } - - /* Checks for bad usage of directives in action code. */ - for ( ActionList::Iter act = actionList; act.lte(); act++ ) - checkAction( act ); -} - -void ParseData::makeExportsNameTree() -{ - /* Make a name tree for the exports. */ - initExportsNameWalk(); - - /* First make the name tree. */ - for ( GraphDict::Iter gdel = graphDict; gdel.lte(); gdel++ ) { - if ( gdel->value->isExport ) { - /* Recurse on the instance. */ - gdel->value->makeNameTree( gdel->loc, this ); - } - } -} - -void ParseData::makeExports() -{ - makeExportsNameTree(); - - /* Resove name references in the tree. */ - initExportsNameWalk(); - for ( GraphDict::Iter gdel = graphDict; gdel.lte(); gdel++ ) { - if ( gdel->value->isExport ) - gdel->value->resolveNameRefs( this ); - } - - /* Make all the instantiations, we know that main exists in this list. */ - initExportsNameWalk(); - for ( GraphDict::Iter gdel = graphDict; gdel.lte(); gdel++ ) { - /* Check if this var def is an export. */ - if ( gdel->value->isExport ) { - /* Build the graph from a walk of the parse tree. */ - FsmAp *graph = gdel->value->walk( this ); - - /* Build the graph from a walk of the parse tree. */ - if ( !graph->checkSingleCharMachine() ) { - error(gdel->loc) << "bad export machine, must define " - "a single character" << endl; - } - else { - /* Safe to extract the key and declare the export. */ - Key exportKey = graph->startState->outList.head->lowKey; - exportList.append( new Export( gdel->value->name, exportKey ) ); - } - } - } - -} - -/* Construct the machine and catch failures which can occur during - * construction. */ -void ParseData::prepareMachineGen( GraphDictEl *graphDictEl ) -{ - try { - /* This machine construction can fail. */ - prepareMachineGenTBWrapped( graphDictEl ); - } - catch ( FsmConstructFail fail ) { - switch ( fail.reason ) { - case FsmConstructFail::CondNoKeySpace: { - InputLoc &loc = alphTypeSet ? alphTypeLoc : sectionLoc; - error(loc) << "sorry, no more characters are " - "available in the alphabet space" << endl; - error(loc) << " for conditions, please use a " - "smaller alphtype or reduce" << endl; - error(loc) << " the span of characters on which " - "conditions are embedded" << endl; - break; - } - } - } -} - -void ParseData::prepareMachineGenTBWrapped( GraphDictEl *graphDictEl ) -{ - beginProcessing(); - initKeyOps(); - makeRootNames(); - initLongestMatchData(); - - /* Make the graph, do minimization. */ - if ( graphDictEl == 0 ) - sectionGraph = makeAll(); - else - sectionGraph = makeSpecific( graphDictEl ); - - /* Compute exports from the export definitions. */ - makeExports(); - - /* If any errors have occured in the input file then don't write anything. */ - if ( gblErrorCount > 0 ) - return; - - analyzeGraph( sectionGraph ); - - /* Depends on the graph analysis. */ - setLongestMatchData( sectionGraph ); - - /* Decide if an error state is necessary. - * 1. There is an error transition - * 2. There is a gap in the transitions - * 3. The longest match operator requires it. */ - if ( lmRequiresErrorState || sectionGraph->hasErrorTrans() ) - sectionGraph->errState = sectionGraph->addState(); - - /* State numbers need to be assigned such that all final states have a - * larger state id number than all non-final states. This enables the - * first_final mechanism to function correctly. We also want states to be - * ordered in a predictable fashion. So we first apply a depth-first - * search, then do a stable sort by final state status, then assign - * numbers. */ - - sectionGraph->depthFirstOrdering(); - sectionGraph->sortStatesByFinal(); - sectionGraph->setStateNumbers( 0 ); -} - -void ParseData::generateReduced( InputData &inputData ) -{ - beginProcessing(); - - cgd = makeCodeGen( inputData.inputFileName, sectionName, *inputData.outStream ); - - /* Make the generator. */ - BackendGen backendGen( sectionName, this, sectionGraph, cgd ); - - /* Write out with it. */ - backendGen.makeBackend(); - - if ( printStatistics ) { - cerr << "fsm name : " << sectionName << endl; - cerr << "num states: " << sectionGraph->stateList.length() << endl; - cerr << endl; - } -} - -void ParseData::generateXML( ostream &out ) -{ - beginProcessing(); - - /* Make the generator. */ - XMLCodeGen codeGen( sectionName, this, sectionGraph, out ); - - /* Write out with it. */ - codeGen.writeXML(); - - if ( printStatistics ) { - cerr << "fsm name : " << sectionName << endl; - cerr << "num states: " << sectionGraph->stateList.length() << endl; - cerr << endl; - } -} - + } +} + +/* Make an fsm key in int format (what the fsm graph uses) from an alphabet + * number returned by the parser. Validates that the number doesn't overflow + * the alphabet type. */ +Key makeFsmKeyNum( char *str, const InputLoc &loc, ParseData *pd ) +{ + /* Switch on hex/decimal format. */ + if ( str[0] == '0' && str[1] == 'x' ) + return makeFsmKeyHex( str, loc, pd ); + else + return makeFsmKeyDec( str, loc, pd ); +} + +/* Make an fsm int format (what the fsm graph uses) from a single character. + * Performs proper conversion depending on signed/unsigned property of the + * alphabet. */ +Key makeFsmKeyChar( char c, ParseData *pd ) +{ + if ( keyOps->isSigned ) { + /* Copy from a char type. */ + return Key( c ); + } + else { + /* Copy from an unsigned byte type. */ + return Key( (unsigned char)c ); + } +} + +/* Make an fsm key array in int format (what the fsm graph uses) from a string + * of characters. Performs proper conversion depending on signed/unsigned + * property of the alphabet. */ +void makeFsmKeyArray( Key *result, char *data, int len, ParseData *pd ) +{ + if ( keyOps->isSigned ) { + /* Copy from a char star type. */ + char *src = data; + for ( int i = 0; i < len; i++ ) + result[i] = Key(src[i]); + } + else { + /* Copy from an unsigned byte ptr type. */ + unsigned char *src = (unsigned char*) data; + for ( int i = 0; i < len; i++ ) + result[i] = Key(src[i]); + } +} + +/* Like makeFsmKeyArray except the result has only unique keys. They ordering + * will be changed. */ +void makeFsmUniqueKeyArray( KeySet &result, char *data, int len, + bool caseInsensitive, ParseData *pd ) +{ + /* Use a transitions list for getting unique keys. */ + if ( keyOps->isSigned ) { + /* Copy from a char star type. */ + char *src = data; + for ( int si = 0; si < len; si++ ) { + Key key( src[si] ); + result.insert( key ); + if ( caseInsensitive ) { + if ( key.isLower() ) + result.insert( key.toUpper() ); + else if ( key.isUpper() ) + result.insert( key.toLower() ); + } + } + } + else { + /* Copy from an unsigned byte ptr type. */ + unsigned char *src = (unsigned char*) data; + for ( int si = 0; si < len; si++ ) { + Key key( src[si] ); + result.insert( key ); + if ( caseInsensitive ) { + if ( key.isLower() ) + result.insert( key.toUpper() ); + else if ( key.isUpper() ) + result.insert( key.toLower() ); + } + } + } +} + +FsmAp *dotFsm( ParseData *pd ) +{ + FsmAp *retFsm = new FsmAp(); + retFsm->rangeFsm( keyOps->minKey, keyOps->maxKey ); + return retFsm; +} + +FsmAp *dotStarFsm( ParseData *pd ) +{ + FsmAp *retFsm = new FsmAp(); + retFsm->rangeStarFsm( keyOps->minKey, keyOps->maxKey ); + return retFsm; +} + +/* Make a builtin type. Depends on the signed nature of the alphabet type. */ +FsmAp *makeBuiltin( BuiltinMachine builtin, ParseData *pd ) +{ + /* FsmAp created to return. */ + FsmAp *retFsm = 0; + bool isSigned = keyOps->isSigned; + + switch ( builtin ) { + case BT_Any: { + /* All characters. */ + retFsm = dotFsm( pd ); + break; + } + case BT_Ascii: { + /* Ascii characters 0 to 127. */ + retFsm = new FsmAp(); + retFsm->rangeFsm( 0, 127 ); + break; + } + case BT_Extend: { + /* Ascii extended characters. This is the full byte range. Dependent + * on signed, vs no signed. If the alphabet is one byte then just use + * dot fsm. */ + if ( isSigned ) { + retFsm = new FsmAp(); + retFsm->rangeFsm( -128, 127 ); + } + else { + retFsm = new FsmAp(); + retFsm->rangeFsm( 0, 255 ); + } + break; + } + case BT_Alpha: { + /* Alpha [A-Za-z]. */ + FsmAp *upper = new FsmAp(), *lower = new FsmAp(); + upper->rangeFsm( 'A', 'Z' ); + lower->rangeFsm( 'a', 'z' ); + upper->unionOp( lower ); + upper->minimizePartition2(); + retFsm = upper; + break; + } + case BT_Digit: { + /* Digits [0-9]. */ + retFsm = new FsmAp(); + retFsm->rangeFsm( '0', '9' ); + break; + } + case BT_Alnum: { + /* Alpha numerics [0-9A-Za-z]. */ + FsmAp *digit = new FsmAp(), *lower = new FsmAp(); + FsmAp *upper = new FsmAp(); + digit->rangeFsm( '0', '9' ); + upper->rangeFsm( 'A', 'Z' ); + lower->rangeFsm( 'a', 'z' ); + digit->unionOp( upper ); + digit->unionOp( lower ); + digit->minimizePartition2(); + retFsm = digit; + break; + } + case BT_Lower: { + /* Lower case characters. */ + retFsm = new FsmAp(); + retFsm->rangeFsm( 'a', 'z' ); + break; + } + case BT_Upper: { + /* Upper case characters. */ + retFsm = new FsmAp(); + retFsm->rangeFsm( 'A', 'Z' ); + break; + } + case BT_Cntrl: { + /* Control characters. */ + FsmAp *cntrl = new FsmAp(); + FsmAp *highChar = new FsmAp(); + cntrl->rangeFsm( 0, 31 ); + highChar->concatFsm( 127 ); + cntrl->unionOp( highChar ); + cntrl->minimizePartition2(); + retFsm = cntrl; + break; + } + case BT_Graph: { + /* Graphical ascii characters [!-~]. */ + retFsm = new FsmAp(); + retFsm->rangeFsm( '!', '~' ); + break; + } + case BT_Print: { + /* Printable characters. Same as graph except includes space. */ + retFsm = new FsmAp(); + retFsm->rangeFsm( ' ', '~' ); + break; + } + case BT_Punct: { + /* Punctuation. */ + FsmAp *range1 = new FsmAp(); + FsmAp *range2 = new FsmAp(); + FsmAp *range3 = new FsmAp(); + FsmAp *range4 = new FsmAp(); + range1->rangeFsm( '!', '/' ); + range2->rangeFsm( ':', '@' ); + range3->rangeFsm( '[', '`' ); + range4->rangeFsm( '{', '~' ); + range1->unionOp( range2 ); + range1->unionOp( range3 ); + range1->unionOp( range4 ); + range1->minimizePartition2(); + retFsm = range1; + break; + } + case BT_Space: { + /* Whitespace: [\t\v\f\n\r ]. */ + FsmAp *cntrl = new FsmAp(); + FsmAp *space = new FsmAp(); + cntrl->rangeFsm( '\t', '\r' ); + space->concatFsm( ' ' ); + cntrl->unionOp( space ); + cntrl->minimizePartition2(); + retFsm = cntrl; + break; + } + case BT_Xdigit: { + /* Hex digits [0-9A-Fa-f]. */ + FsmAp *digit = new FsmAp(); + FsmAp *upper = new FsmAp(); + FsmAp *lower = new FsmAp(); + digit->rangeFsm( '0', '9' ); + upper->rangeFsm( 'A', 'F' ); + lower->rangeFsm( 'a', 'f' ); + digit->unionOp( upper ); + digit->unionOp( lower ); + digit->minimizePartition2(); + retFsm = digit; + break; + } + case BT_Lambda: { + retFsm = new FsmAp(); + retFsm->lambdaFsm(); + break; + } + case BT_Empty: { + retFsm = new FsmAp(); + retFsm->emptyFsm(); + break; + }} + + return retFsm; +} + +/* Check if this name inst or any name inst below is referenced. */ +bool NameInst::anyRefsRec() +{ + if ( numRefs > 0 ) + return true; + + /* Recurse on children until true. */ + for ( NameVect::Iter ch = childVect; ch.lte(); ch++ ) { + if ( (*ch)->anyRefsRec() ) + return true; + } + + return false; +} + +/* + * ParseData + */ + +/* Initialize the structure that will collect info during the parse of a + * machine. */ +ParseData::ParseData( const char *fileName, char *sectionName, + const InputLoc §ionLoc ) +: + sectionGraph(0), + generatingSectionSubset(false), + nextPriorKey(0), + /* 0 is reserved for global error actions. */ + nextLocalErrKey(1), + nextNameId(0), + nextCondId(0), + alphTypeSet(false), + getKeyExpr(0), + accessExpr(0), + prePushExpr(0), + postPopExpr(0), + pExpr(0), + peExpr(0), + eofExpr(0), + csExpr(0), + topExpr(0), + stackExpr(0), + actExpr(0), + tokstartExpr(0), + tokendExpr(0), + dataExpr(0), + lowerNum(0), + upperNum(0), + fileName(fileName), + sectionName(sectionName), + sectionLoc(sectionLoc), + curActionOrd(0), + curPriorOrd(0), + rootName(0), + exportsRootName(0), + nextEpsilonResolvedLink(0), + nextLongestMatchId(1), + lmRequiresErrorState(false), + cgd(0) +{ + /* Initialize the dictionary of graphs. This is our symbol table. The + * initialization needs to be done on construction which happens at the + * beginning of a machine spec so any assignment operators can reference + * the builtins. */ + initGraphDict(); +} + +/* Clean up the data collected during a parse. */ +ParseData::~ParseData() +{ + /* Delete all the nodes in the action list. Will cause all the + * string data that represents the actions to be deallocated. */ + actionList.empty(); +} + +/* Make a name id in the current name instantiation scope if it is not + * already there. */ +NameInst *ParseData::addNameInst( const InputLoc &loc, const char *data, bool isLabel ) +{ + /* Create the name instantitaion object and insert it. */ + NameInst *newNameInst = new NameInst( loc, curNameInst, data, nextNameId++, isLabel ); + curNameInst->childVect.append( newNameInst ); + if ( data != 0 ) + curNameInst->children.insertMulti( data, newNameInst ); + return newNameInst; +} + +void ParseData::initNameWalk() +{ + curNameInst = rootName; + curNameChild = 0; +} + +void ParseData::initExportsNameWalk() +{ + curNameInst = exportsRootName; + curNameChild = 0; +} + +/* Goes into the next child scope. The number of the child is already set up. + * We need this for the syncronous name tree and parse tree walk to work + * properly. It is reset on entry into a scope and advanced on poping of a + * scope. A call to enterNameScope should be accompanied by a corresponding + * popNameScope. */ +NameFrame ParseData::enterNameScope( bool isLocal, int numScopes ) +{ + /* Save off the current data. */ + NameFrame retFrame; + retFrame.prevNameInst = curNameInst; + retFrame.prevNameChild = curNameChild; + retFrame.prevLocalScope = localNameScope; + + /* Enter into the new name scope. */ + for ( int i = 0; i < numScopes; i++ ) { + curNameInst = curNameInst->childVect[curNameChild]; + curNameChild = 0; + } + + if ( isLocal ) + localNameScope = curNameInst; + + return retFrame; +} + +/* Return from a child scope to a parent. The parent info must be specified as + * an argument and is obtained from the corresponding call to enterNameScope. + * */ +void ParseData::popNameScope( const NameFrame &frame ) +{ + /* Pop the name scope. */ + curNameInst = frame.prevNameInst; + curNameChild = frame.prevNameChild+1; + localNameScope = frame.prevLocalScope; +} + +void ParseData::resetNameScope( const NameFrame &frame ) +{ + /* Pop the name scope. */ + curNameInst = frame.prevNameInst; + curNameChild = frame.prevNameChild; + localNameScope = frame.prevLocalScope; +} + + +void ParseData::unsetObsoleteEntries( FsmAp *graph ) +{ + /* Loop the reference names and increment the usage. Names that are no + * longer needed will be unset in graph. */ + for ( NameVect::Iter ref = curNameInst->referencedNames; ref.lte(); ref++ ) { + /* Get the name. */ + NameInst *name = *ref; + name->numUses += 1; + + /* If the name is no longer needed unset its corresponding entry. */ + if ( name->numUses == name->numRefs ) { + assert( graph->entryPoints.find( name->id ) != 0 ); + graph->unsetEntry( name->id ); + assert( graph->entryPoints.find( name->id ) == 0 ); + } + } +} + +NameSet ParseData::resolvePart( NameInst *refFrom, const char *data, bool recLabelsOnly ) +{ + /* Queue needed for breadth-first search, load it with the start node. */ + NameInstList nameQueue; + nameQueue.append( refFrom ); + + NameSet result; + while ( nameQueue.length() > 0 ) { + /* Pull the next from location off the queue. */ + NameInst *from = nameQueue.detachFirst(); + + /* Look for the name. */ + NameMapEl *low, *high; + if ( from->children.findMulti( data, low, high ) ) { + /* Record all instances of the name. */ + for ( ; low <= high; low++ ) + result.insert( low->value ); + } + + /* Name not there, do breadth-first operation of appending all + * childrent to the processing queue. */ + for ( NameVect::Iter name = from->childVect; name.lte(); name++ ) { + if ( !recLabelsOnly || (*name)->isLabel ) + nameQueue.append( *name ); + } + } + + /* Queue exhausted and name never found. */ + return result; +} + +void ParseData::resolveFrom( NameSet &result, NameInst *refFrom, + const NameRef &nameRef, int namePos ) +{ + /* Look for the name in the owning scope of the factor with aug. */ + NameSet partResult = resolvePart( refFrom, nameRef[namePos], false ); + + /* If there are more parts to the name then continue on. */ + if ( ++namePos < nameRef.length() ) { + /* There are more components to the name, search using all the part + * results as the base. */ + for ( NameSet::Iter name = partResult; name.lte(); name++ ) + resolveFrom( result, *name, nameRef, namePos ); + } + else { + /* This is the last component, append the part results to the final + * results. */ + result.insert( partResult ); + } +} + +/* Write out a name reference. */ +ostream &operator<<( ostream &out, const NameRef &nameRef ) +{ + int pos = 0; + if ( nameRef[pos] == 0 ) { + out << "::"; + pos += 1; + } + out << nameRef[pos++]; + for ( ; pos < nameRef.length(); pos++ ) + out << "::" << nameRef[pos]; + return out; +} + +ostream &operator<<( ostream &out, const NameInst &nameInst ) +{ + /* Count the number fully qualified name parts. */ + int numParents = 0; + NameInst *curParent = nameInst.parent; + while ( curParent != 0 ) { + numParents += 1; + curParent = curParent->parent; + } + + /* Make an array and fill it in. */ + curParent = nameInst.parent; + NameInst **parents = new NameInst*[numParents]; + for ( int p = numParents-1; p >= 0; p-- ) { + parents[p] = curParent; + curParent = curParent->parent; + } + + /* Write the parents out, skip the root. */ + for ( int p = 1; p < numParents; p++ ) + out << "::" << ( parents[p]->name != 0 ? parents[p]->name : "<ANON>" ); + + /* Write the name and cleanup. */ + out << "::" << ( nameInst.name != 0 ? nameInst.name : "<ANON>" ); + delete[] parents; + return out; +} + +struct CmpNameInstLoc +{ + static int compare( const NameInst *ni1, const NameInst *ni2 ) + { + if ( ni1->loc.line < ni2->loc.line ) + return -1; + else if ( ni1->loc.line > ni2->loc.line ) + return 1; + else if ( ni1->loc.col < ni2->loc.col ) + return -1; + else if ( ni1->loc.col > ni2->loc.col ) + return 1; + return 0; + } +}; + +void errorStateLabels( const NameSet &resolved ) +{ + MergeSort<NameInst*, CmpNameInstLoc> mergeSort; + mergeSort.sort( resolved.data, resolved.length() ); + for ( NameSet::Iter res = resolved; res.lte(); res++ ) + error((*res)->loc) << " -> " << **res << endl; +} + + +NameInst *ParseData::resolveStateRef( const NameRef &nameRef, InputLoc &loc, Action *action ) +{ + NameInst *nameInst = 0; + + /* Do the local search if the name is not strictly a root level name + * search. */ + if ( nameRef[0] != 0 ) { + /* If the action is referenced, resolve all of them. */ + if ( action != 0 && action->actionRefs.length() > 0 ) { + /* Look for the name in all referencing scopes. */ + NameSet resolved; + for ( ActionRefs::Iter actRef = action->actionRefs; actRef.lte(); actRef++ ) + resolveFrom( resolved, *actRef, nameRef, 0 ); + + if ( resolved.length() > 0 ) { + /* Take the first one. */ + nameInst = resolved[0]; + if ( resolved.length() > 1 ) { + /* Complain about the multiple references. */ + error(loc) << "state reference " << nameRef << + " resolves to multiple entry points" << endl; + errorStateLabels( resolved ); + } + } + } + } + + /* If not found in the local scope, look in global. */ + if ( nameInst == 0 ) { + NameSet resolved; + int fromPos = nameRef[0] != 0 ? 0 : 1; + resolveFrom( resolved, rootName, nameRef, fromPos ); + + if ( resolved.length() > 0 ) { + /* Take the first. */ + nameInst = resolved[0]; + if ( resolved.length() > 1 ) { + /* Complain about the multiple references. */ + error(loc) << "state reference " << nameRef << + " resolves to multiple entry points" << endl; + errorStateLabels( resolved ); + } + } + } + + if ( nameInst == 0 ) { + /* If not found then complain. */ + error(loc) << "could not resolve state reference " << nameRef << endl; + } + return nameInst; +} + +void ParseData::resolveNameRefs( InlineList *inlineList, Action *action ) +{ + for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) { + switch ( item->type ) { + case InlineItem::Entry: case InlineItem::Goto: + case InlineItem::Call: case InlineItem::Next: { + /* Resolve, pass action for local search. */ + NameInst *target = resolveStateRef( *item->nameRef, item->loc, action ); + + /* Name lookup error reporting is handled by resolveStateRef. */ + if ( target != 0 ) { + /* Check if the target goes into a longest match. */ + NameInst *search = target->parent; + while ( search != 0 ) { + if ( search->isLongestMatch ) { + error(item->loc) << "cannot enter inside a longest " + "match construction as an entry point" << endl; + break; + } + search = search->parent; + } + + /* Record the reference in the name. This will cause the + * entry point to survive to the end of the graph + * generating walk. */ + target->numRefs += 1; + } + + item->nameTarg = target; + break; + } + default: + break; + } + + /* Some of the item types may have children. */ + if ( item->children != 0 ) + resolveNameRefs( item->children, action ); + } +} + +/* Resolve references to labels in actions. */ +void ParseData::resolveActionNameRefs() +{ + for ( ActionList::Iter act = actionList; act.lte(); act++ ) { + /* Only care about the actions that are referenced. */ + if ( act->actionRefs.length() > 0 ) + resolveNameRefs( act->inlineList, act ); + } +} + +/* Walk a name tree starting at from and fill the name index. */ +void ParseData::fillNameIndex( NameInst *from ) +{ + /* Fill the value for from in the name index. */ + nameIndex[from->id] = from; + + /* Recurse on the implicit final state and then all children. */ + if ( from->final != 0 ) + fillNameIndex( from->final ); + for ( NameVect::Iter name = from->childVect; name.lte(); name++ ) + fillNameIndex( *name ); +} + +void ParseData::makeRootNames() +{ + /* Create the root name. */ + rootName = new NameInst( InputLoc(), 0, 0, nextNameId++, false ); + exportsRootName = new NameInst( InputLoc(), 0, 0, nextNameId++, false ); +} + +/* Build the name tree and supporting data structures. */ +void ParseData::makeNameTree( GraphDictEl *dictEl ) +{ + /* Set up curNameInst for the walk. */ + initNameWalk(); + + if ( dictEl != 0 ) { + /* A start location has been specified. */ + dictEl->value->makeNameTree( dictEl->loc, this ); + } + else { + /* First make the name tree. */ + for ( GraphList::Iter glel = instanceList; glel.lte(); glel++ ) { + /* Recurse on the instance. */ + glel->value->makeNameTree( glel->loc, this ); + } + } + + /* The number of nodes in the tree can now be given by nextNameId */ + nameIndex = new NameInst*[nextNameId]; + memset( nameIndex, 0, sizeof(NameInst*)*nextNameId ); + fillNameIndex( rootName ); + fillNameIndex( exportsRootName ); +} + + +void ParseData::createBuiltin( const char *name, BuiltinMachine builtin ) +{ + Expression *expression = new Expression( builtin ); + Join *join = new Join( expression ); + MachineDef *machineDef = new MachineDef( join ); + VarDef *varDef = new VarDef( name, machineDef ); + GraphDictEl *graphDictEl = new GraphDictEl( name, varDef ); + graphDict.insert( graphDictEl ); +} + +/* Initialize the graph dict with builtin types. */ +void ParseData::initGraphDict( ) +{ + createBuiltin( "any", BT_Any ); + createBuiltin( "ascii", BT_Ascii ); + createBuiltin( "extend", BT_Extend ); + createBuiltin( "alpha", BT_Alpha ); + createBuiltin( "digit", BT_Digit ); + createBuiltin( "alnum", BT_Alnum ); + createBuiltin( "lower", BT_Lower ); + createBuiltin( "upper", BT_Upper ); + createBuiltin( "cntrl", BT_Cntrl ); + createBuiltin( "graph", BT_Graph ); + createBuiltin( "print", BT_Print ); + createBuiltin( "punct", BT_Punct ); + createBuiltin( "space", BT_Space ); + createBuiltin( "xdigit", BT_Xdigit ); + createBuiltin( "null", BT_Lambda ); + createBuiltin( "zlen", BT_Lambda ); + createBuiltin( "empty", BT_Empty ); +} + +/* Set the alphabet type. If the types are not valid returns false. */ +bool ParseData::setAlphType( const InputLoc &loc, char *s1, char *s2 ) +{ + alphTypeLoc = loc; + userAlphType = findAlphType( s1, s2 ); + alphTypeSet = true; + return userAlphType != 0; +} + +/* Set the alphabet type. If the types are not valid returns false. */ +bool ParseData::setAlphType( const InputLoc &loc, char *s1 ) +{ + alphTypeLoc = loc; + userAlphType = findAlphType( s1 ); + alphTypeSet = true; + return userAlphType != 0; +} + +bool ParseData::setVariable( char *var, InlineList *inlineList ) +{ + bool set = true; + + if ( strcmp( var, "p" ) == 0 ) + pExpr = inlineList; + else if ( strcmp( var, "pe" ) == 0 ) + peExpr = inlineList; + else if ( strcmp( var, "eof" ) == 0 ) + eofExpr = inlineList; + else if ( strcmp( var, "cs" ) == 0 ) + csExpr = inlineList; + else if ( strcmp( var, "data" ) == 0 ) + dataExpr = inlineList; + else if ( strcmp( var, "top" ) == 0 ) + topExpr = inlineList; + else if ( strcmp( var, "stack" ) == 0 ) + stackExpr = inlineList; + else if ( strcmp( var, "act" ) == 0 ) + actExpr = inlineList; + else if ( strcmp( var, "ts" ) == 0 ) + tokstartExpr = inlineList; + else if ( strcmp( var, "te" ) == 0 ) + tokendExpr = inlineList; + else + set = false; + + return set; +} + +/* Initialize the key operators object that will be referenced by all fsms + * created. */ +void ParseData::initKeyOps( ) +{ + /* Signedness and bounds. */ + HostType *alphType = alphTypeSet ? userAlphType : hostLang->defaultAlphType; + thisKeyOps.setAlphType( alphType ); + + if ( lowerNum != 0 ) { + /* If ranges are given then interpret the alphabet type. */ + thisKeyOps.minKey = makeFsmKeyNum( lowerNum, rangeLowLoc, this ); + thisKeyOps.maxKey = makeFsmKeyNum( upperNum, rangeHighLoc, this ); + } + + thisCondData.lastCondKey = thisKeyOps.maxKey; +} + +void ParseData::printNameInst( NameInst *nameInst, int level ) +{ + for ( int i = 0; i < level; i++ ) + cerr << " "; + cerr << (nameInst->name != 0 ? nameInst->name : "<ANON>") << + " id: " << nameInst->id << + " refs: " << nameInst->numRefs << + " uses: " << nameInst->numUses << endl; + for ( NameVect::Iter name = nameInst->childVect; name.lte(); name++ ) + printNameInst( *name, level+1 ); +} + +/* Remove duplicates of unique actions from an action table. */ +void ParseData::removeDups( ActionTable &table ) +{ + /* Scan through the table looking for unique actions to + * remove duplicates of. */ + for ( int i = 0; i < table.length(); i++ ) { + /* Remove any duplicates ahead of i. */ + for ( int r = i+1; r < table.length(); ) { + if ( table[r].value == table[i].value ) + table.vremove(r); + else + r += 1; + } + } +} + +/* Remove duplicates from action lists. This operates only on transition and + * eof action lists and so should be called once all actions have been + * transfered to their final resting place. */ +void ParseData::removeActionDups( FsmAp *graph ) +{ + /* Loop all states. */ + for ( StateList::Iter state = graph->stateList; state.lte(); state++ ) { + /* Loop all transitions. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) + removeDups( trans->actionTable ); + removeDups( state->toStateActionTable ); + removeDups( state->fromStateActionTable ); + removeDups( state->eofActionTable ); + } +} + +Action *ParseData::newAction( const char *name, InlineList *inlineList ) +{ + InputLoc loc; + loc.line = 1; + loc.col = 1; + loc.fileName = "NONE"; + + Action *action = new Action( loc, name, inlineList, nextCondId++ ); + action->actionRefs.append( rootName ); + actionList.append( action ); + return action; +} + +void ParseData::initLongestMatchData() +{ + if ( lmList.length() > 0 ) { + /* The initTokStart action resets the token start. */ + InlineList *il1 = new InlineList; + il1->append( new InlineItem( InputLoc(), InlineItem::LmInitTokStart ) ); + initTokStart = newAction( "initts", il1 ); + initTokStart->isLmAction = true; + + /* The initActId action gives act a default value. */ + InlineList *il4 = new InlineList; + il4->append( new InlineItem( InputLoc(), InlineItem::LmInitAct ) ); + initActId = newAction( "initact", il4 ); + initActId->isLmAction = true; + + /* The setTokStart action sets tokstart. */ + InlineList *il5 = new InlineList; + il5->append( new InlineItem( InputLoc(), InlineItem::LmSetTokStart ) ); + setTokStart = newAction( "ts", il5 ); + setTokStart->isLmAction = true; + + /* The setTokEnd action sets tokend. */ + InlineList *il3 = new InlineList; + il3->append( new InlineItem( InputLoc(), InlineItem::LmSetTokEnd ) ); + setTokEnd = newAction( "te", il3 ); + setTokEnd->isLmAction = true; + + /* The action will also need an ordering: ahead of all user action + * embeddings. */ + initTokStartOrd = curActionOrd++; + initActIdOrd = curActionOrd++; + setTokStartOrd = curActionOrd++; + setTokEndOrd = curActionOrd++; + } +} + +/* After building the graph, do some extra processing to ensure the runtime + * data of the longest mactch operators is consistent. */ +void ParseData::setLongestMatchData( FsmAp *graph ) +{ + if ( lmList.length() > 0 ) { + /* Make sure all entry points (targets of fgoto, fcall, fnext, fentry) + * init the tokstart. */ + for ( EntryMap::Iter en = graph->entryPoints; en.lte(); en++ ) { + /* This is run after duplicates are removed, we must guard against + * inserting a duplicate. */ + ActionTable &actionTable = en->value->toStateActionTable; + if ( ! actionTable.hasAction( initTokStart ) ) + actionTable.setAction( initTokStartOrd, initTokStart ); + } + + /* Find the set of states that are the target of transitions with + * actions that have calls. These states will be targeted by fret + * statements. */ + StateSet states; + for ( StateList::Iter state = graph->stateList; state.lte(); state++ ) { + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + for ( ActionTable::Iter ati = trans->actionTable; ati.lte(); ati++ ) { + if ( ati->value->anyCall && trans->toState != 0 ) + states.insert( trans->toState ); + } + } + } + + + /* Init tokstart upon entering the above collected states. */ + for ( StateSet::Iter ps = states; ps.lte(); ps++ ) { + /* This is run after duplicates are removed, we must guard against + * inserting a duplicate. */ + ActionTable &actionTable = (*ps)->toStateActionTable; + if ( ! actionTable.hasAction( initTokStart ) ) + actionTable.setAction( initTokStartOrd, initTokStart ); + } + } +} + +/* Make the graph from a graph dict node. Does minimization and state sorting. */ +FsmAp *ParseData::makeInstance( GraphDictEl *gdNode ) +{ + /* Build the graph from a walk of the parse tree. */ + FsmAp *graph = gdNode->value->walk( this ); + + /* Resolve any labels that point to multiple states. Any labels that are + * still around are referenced only by gotos and calls and they need to be + * made into deterministic entry points. */ + graph->deterministicEntry(); + + /* + * All state construction is now complete. + */ + + /* Transfer actions from the out action tables to eof action tables. */ + for ( StateSet::Iter state = graph->finStateSet; state.lte(); state++ ) + graph->transferOutActions( *state ); + + /* Transfer global error actions. */ + for ( StateList::Iter state = graph->stateList; state.lte(); state++ ) + graph->transferErrorActions( state, 0 ); + + if ( ::wantDupsRemoved ) + removeActionDups( graph ); + + /* Remove unreachable states. There should be no dead end states. The + * subtract and intersection operators are the only places where they may + * be created and those operators clean them up. */ + graph->removeUnreachableStates(); + + /* No more fsm operations are to be done. Action ordering numbers are + * no longer of use and will just hinder minimization. Clear them. */ + graph->nullActionKeys(); + + /* Transition priorities are no longer of use. We can clear them + * because they will just hinder minimization as well. Clear them. */ + graph->clearAllPriorities(); + + if ( minimizeOpt != MinimizeNone ) { + /* Minimize here even if we minimized at every op. Now that function + * keys have been cleared we may get a more minimal fsm. */ + switch ( minimizeLevel ) { + case MinimizeApprox: + graph->minimizeApproximate(); + break; + case MinimizeStable: + graph->minimizeStable(); + break; + case MinimizePartition1: + graph->minimizePartition1(); + break; + case MinimizePartition2: + graph->minimizePartition2(); + break; + } + } + + graph->compressTransitions(); + + return graph; +} + +void ParseData::printNameTree() +{ + /* Print the name instance map. */ + for ( NameVect::Iter name = rootName->childVect; name.lte(); name++ ) + printNameInst( *name, 0 ); + + cerr << "name index:" << endl; + /* Show that the name index is correct. */ + for ( int ni = 0; ni < nextNameId; ni++ ) { + cerr << ni << ": "; + const char *name = nameIndex[ni]->name; + cerr << ( name != 0 ? name : "<ANON>" ) << endl; + } +} + +FsmAp *ParseData::makeSpecific( GraphDictEl *gdNode ) +{ + /* Build the name tree and supporting data structures. */ + makeNameTree( gdNode ); + + /* Resove name references from gdNode. */ + initNameWalk(); + gdNode->value->resolveNameRefs( this ); + + /* Do not resolve action references. Since we are not building the entire + * graph there's a good chance that many name references will fail. This + * is okay since generating part of the graph is usually only done when + * inspecting the compiled machine. */ + + /* Same story for extern entry point references. */ + + /* Flag this case so that the XML code generator is aware that we haven't + * looked up name references in actions. It can then avoid segfaulting. */ + generatingSectionSubset = true; + + /* Just building the specified graph. */ + initNameWalk(); + FsmAp *mainGraph = makeInstance( gdNode ); + + return mainGraph; +} + +FsmAp *ParseData::makeAll() +{ + /* Build the name tree and supporting data structures. */ + makeNameTree( 0 ); + + /* Resove name references in the tree. */ + initNameWalk(); + for ( GraphList::Iter glel = instanceList; glel.lte(); glel++ ) + glel->value->resolveNameRefs( this ); + + /* Resolve action code name references. */ + resolveActionNameRefs(); + + /* Force name references to the top level instantiations. */ + for ( NameVect::Iter inst = rootName->childVect; inst.lte(); inst++ ) + (*inst)->numRefs += 1; + + FsmAp *mainGraph = 0; + FsmAp **graphs = new FsmAp*[instanceList.length()]; + int numOthers = 0; + + /* Make all the instantiations, we know that main exists in this list. */ + initNameWalk(); + for ( GraphList::Iter glel = instanceList; glel.lte(); glel++ ) { + if ( strcmp( glel->key, mainMachine ) == 0 ) { + /* Main graph is always instantiated. */ + mainGraph = makeInstance( glel ); + } + else { + /* Instantiate and store in others array. */ + graphs[numOthers++] = makeInstance( glel ); + } + } + + if ( mainGraph == 0 ) + mainGraph = graphs[--numOthers]; + + if ( numOthers > 0 ) { + /* Add all the other graphs into main. */ + mainGraph->globOp( graphs, numOthers ); + } + + delete[] graphs; + return mainGraph; +} + +void ParseData::analyzeAction( Action *action, InlineList *inlineList ) +{ + /* FIXME: Actions used as conditions should be very constrained. */ + for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) { + if ( item->type == InlineItem::Call || item->type == InlineItem::CallExpr ) + action->anyCall = true; + + /* Need to recurse into longest match items. */ + if ( item->type == InlineItem::LmSwitch ) { + LongestMatch *lm = item->longestMatch; + for ( LmPartList::Iter lmi = *lm->longestMatchList; lmi.lte(); lmi++ ) { + if ( lmi->action != 0 ) + analyzeAction( action, lmi->action->inlineList ); + } + } + + if ( item->type == InlineItem::LmOnLast || + item->type == InlineItem::LmOnNext || + item->type == InlineItem::LmOnLagBehind ) + { + LongestMatchPart *lmi = item->longestMatchPart; + if ( lmi->action != 0 ) + analyzeAction( action, lmi->action->inlineList ); + } + + if ( item->children != 0 ) + analyzeAction( action, item->children ); + } +} + + +/* Check actions for bad uses of fsm directives. We don't go inside longest + * match items in actions created by ragel, since we just want the user + * actions. */ +void ParseData::checkInlineList( Action *act, InlineList *inlineList ) +{ + for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) { + /* EOF checks. */ + if ( act->numEofRefs > 0 ) { + switch ( item->type ) { + /* Currently no checks. */ + default: + break; + } + } + + /* Recurse. */ + if ( item->children != 0 ) + checkInlineList( act, item->children ); + } +} + +void ParseData::checkAction( Action *action ) +{ + /* Check for actions with calls that are embedded within a longest match + * machine. */ + if ( !action->isLmAction && action->numRefs() > 0 && action->anyCall ) { + for ( ActionRefs::Iter ar = action->actionRefs; ar.lte(); ar++ ) { + NameInst *check = *ar; + while ( check != 0 ) { + if ( check->isLongestMatch ) { + error(action->loc) << "within a scanner, fcall is permitted" + " only in pattern actions" << endl; + break; + } + check = check->parent; + } + } + } + + checkInlineList( action, action->inlineList ); +} + + +void ParseData::analyzeGraph( FsmAp *graph ) +{ + for ( ActionList::Iter act = actionList; act.lte(); act++ ) + analyzeAction( act, act->inlineList ); + + for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) { + /* The transition list. */ + for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) { + for ( ActionTable::Iter at = trans->actionTable; at.lte(); at++ ) + at->value->numTransRefs += 1; + } + + for ( ActionTable::Iter at = st->toStateActionTable; at.lte(); at++ ) + at->value->numToStateRefs += 1; + + for ( ActionTable::Iter at = st->fromStateActionTable; at.lte(); at++ ) + at->value->numFromStateRefs += 1; + + for ( ActionTable::Iter at = st->eofActionTable; at.lte(); at++ ) + at->value->numEofRefs += 1; + + for ( StateCondList::Iter sc = st->stateCondList; sc.lte(); sc++ ) { + for ( CondSet::Iter sci = sc->condSpace->condSet; sci.lte(); sci++ ) + (*sci)->numCondRefs += 1; + } + } + + /* Checks for bad usage of directives in action code. */ + for ( ActionList::Iter act = actionList; act.lte(); act++ ) + checkAction( act ); +} + +void ParseData::makeExportsNameTree() +{ + /* Make a name tree for the exports. */ + initExportsNameWalk(); + + /* First make the name tree. */ + for ( GraphDict::Iter gdel = graphDict; gdel.lte(); gdel++ ) { + if ( gdel->value->isExport ) { + /* Recurse on the instance. */ + gdel->value->makeNameTree( gdel->loc, this ); + } + } +} + +void ParseData::makeExports() +{ + makeExportsNameTree(); + + /* Resove name references in the tree. */ + initExportsNameWalk(); + for ( GraphDict::Iter gdel = graphDict; gdel.lte(); gdel++ ) { + if ( gdel->value->isExport ) + gdel->value->resolveNameRefs( this ); + } + + /* Make all the instantiations, we know that main exists in this list. */ + initExportsNameWalk(); + for ( GraphDict::Iter gdel = graphDict; gdel.lte(); gdel++ ) { + /* Check if this var def is an export. */ + if ( gdel->value->isExport ) { + /* Build the graph from a walk of the parse tree. */ + FsmAp *graph = gdel->value->walk( this ); + + /* Build the graph from a walk of the parse tree. */ + if ( !graph->checkSingleCharMachine() ) { + error(gdel->loc) << "bad export machine, must define " + "a single character" << endl; + } + else { + /* Safe to extract the key and declare the export. */ + Key exportKey = graph->startState->outList.head->lowKey; + exportList.append( new Export( gdel->value->name, exportKey ) ); + } + } + } + +} + +/* Construct the machine and catch failures which can occur during + * construction. */ +void ParseData::prepareMachineGen( GraphDictEl *graphDictEl ) +{ + try { + /* This machine construction can fail. */ + prepareMachineGenTBWrapped( graphDictEl ); + } + catch ( FsmConstructFail fail ) { + switch ( fail.reason ) { + case FsmConstructFail::CondNoKeySpace: { + InputLoc &loc = alphTypeSet ? alphTypeLoc : sectionLoc; + error(loc) << "sorry, no more characters are " + "available in the alphabet space" << endl; + error(loc) << " for conditions, please use a " + "smaller alphtype or reduce" << endl; + error(loc) << " the span of characters on which " + "conditions are embedded" << endl; + break; + } + } + } +} + +void ParseData::prepareMachineGenTBWrapped( GraphDictEl *graphDictEl ) +{ + beginProcessing(); + initKeyOps(); + makeRootNames(); + initLongestMatchData(); + + /* Make the graph, do minimization. */ + if ( graphDictEl == 0 ) + sectionGraph = makeAll(); + else + sectionGraph = makeSpecific( graphDictEl ); + + /* Compute exports from the export definitions. */ + makeExports(); + + /* If any errors have occured in the input file then don't write anything. */ + if ( gblErrorCount > 0 ) + return; + + analyzeGraph( sectionGraph ); + + /* Depends on the graph analysis. */ + setLongestMatchData( sectionGraph ); + + /* Decide if an error state is necessary. + * 1. There is an error transition + * 2. There is a gap in the transitions + * 3. The longest match operator requires it. */ + if ( lmRequiresErrorState || sectionGraph->hasErrorTrans() ) + sectionGraph->errState = sectionGraph->addState(); + + /* State numbers need to be assigned such that all final states have a + * larger state id number than all non-final states. This enables the + * first_final mechanism to function correctly. We also want states to be + * ordered in a predictable fashion. So we first apply a depth-first + * search, then do a stable sort by final state status, then assign + * numbers. */ + + sectionGraph->depthFirstOrdering(); + sectionGraph->sortStatesByFinal(); + sectionGraph->setStateNumbers( 0 ); +} + +void ParseData::generateReduced( InputData &inputData ) +{ + beginProcessing(); + + cgd = makeCodeGen( inputData.inputFileName, sectionName, *inputData.outStream ); + + /* Make the generator. */ + BackendGen backendGen( sectionName, this, sectionGraph, cgd ); + + /* Write out with it. */ + backendGen.makeBackend(); + + if ( printStatistics ) { + cerr << "fsm name : " << sectionName << endl; + cerr << "num states: " << sectionGraph->stateList.length() << endl; + cerr << endl; + } +} + +void ParseData::generateXML( ostream &out ) +{ + beginProcessing(); + + /* Make the generator. */ + XMLCodeGen codeGen( sectionName, this, sectionGraph, out ); + + /* Write out with it. */ + codeGen.writeXML(); + + if ( printStatistics ) { + cerr << "fsm name : " << sectionName << endl; + cerr << "num states: " << sectionGraph->stateList.length() << endl; + cerr << endl; + } +} + |