/*
 *  Copyright 2001-2006 Adrian Thurston <thurston@complang.org>
 */

/*  This file is part of Ragel.
 *
 *  Ragel is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 * 
 *  Ragel is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 * 
 *  You should have received a copy of the GNU General Public License
 *  along with Ragel; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 */

#ifndef _PARSETREE_H
#define _PARSETREE_H

#include "ragel.h"
#include "avlmap.h"
#include "bstmap.h"
#include "vector.h"
#include "dlist.h"

struct NameInst;

/* Types of builtin machines. */
enum BuiltinMachine
{
	BT_Any,
	BT_Ascii,
	BT_Extend,
	BT_Alpha,
	BT_Digit,
	BT_Alnum,
	BT_Lower,
	BT_Upper,
	BT_Cntrl,
	BT_Graph,
	BT_Print,
	BT_Punct,
	BT_Space,
	BT_Xdigit,
	BT_Lambda,
	BT_Empty
};


struct ParseData;

/* Leaf type. */
struct Literal;

/* Tree nodes. */

struct Term;
struct FactorWithAug;
struct FactorWithRep;
struct FactorWithNeg;
struct Factor;
struct Expression;
struct Join;
struct MachineDef;
struct LongestMatch;
struct LongestMatchPart;
struct LmPartList;
struct Range;
struct LengthDef;

/* Type of augmentation. Describes locations in the machine. */
enum AugType
{
	/* Transition actions/priorities. */
	at_start,
	at_all,
	at_finish,
	at_leave,

	/* Global error actions. */
	at_start_gbl_error,
	at_all_gbl_error,
	at_final_gbl_error,
	at_not_start_gbl_error,
	at_not_final_gbl_error,
	at_middle_gbl_error,

	/* Local error actions. */
	at_start_local_error,
	at_all_local_error,
	at_final_local_error,
	at_not_start_local_error,
	at_not_final_local_error,
	at_middle_local_error,
	
	/* To State Action embedding. */
	at_start_to_state,
	at_all_to_state,
	at_final_to_state,
	at_not_start_to_state,
	at_not_final_to_state,
	at_middle_to_state,

	/* From State Action embedding. */
	at_start_from_state,
	at_all_from_state,
	at_final_from_state,
	at_not_start_from_state,
	at_not_final_from_state,
	at_middle_from_state,

	/* EOF Action embedding. */
	at_start_eof,
	at_all_eof,
	at_final_eof,
	at_not_start_eof,
	at_not_final_eof,
	at_middle_eof
};

/* IMPORTANT: These must follow the same order as the state augs in AugType
 * since we will be using this to compose AugType. */
enum StateAugType
{
	sat_start = 0,
	sat_all,
	sat_final,
	sat_not_start,
	sat_not_final,
	sat_middle
};

struct Action;
struct PriorDesc;
struct RegExpr;
struct ReItem;
struct ReOrBlock;
struct ReOrItem;
struct ExplicitMachine;
struct InlineItem;
struct InlineList;

/* Reference to a named state. */
typedef Vector<char*> NameRef;
typedef Vector<NameRef*> NameRefList;
typedef Vector<NameInst*> NameTargList;

/* Structure for storing location of epsilon transitons. */
struct EpsilonLink
{
	EpsilonLink( const InputLoc &loc, NameRef &target )
		: loc(loc), target(target) { }

	InputLoc loc;
	NameRef target;
};

struct Label
{
	Label( const InputLoc &loc, char *data )
		: loc(loc), data(data) { }

	InputLoc loc;
	char *data;
};

/* Structrue represents an action assigned to some FactorWithAug node. The
 * factor with aug will keep an array of these. */
struct ParserAction
{
	ParserAction( const InputLoc &loc, AugType type, int localErrKey, Action *action )
		: loc(loc), type(type), localErrKey(localErrKey), action(action) { }

	InputLoc loc;
	AugType type;
	int localErrKey;
	Action *action;
};

struct ConditionTest
{
	ConditionTest( const InputLoc &loc, AugType type, Action *action, bool sense ) : 
		loc(loc), type(type), action(action), sense(sense) { }

	InputLoc loc;
	AugType type;
	Action *action;
	bool sense;
};

struct Token
{
	char *data;
	int length;
	InputLoc loc;

	void append( const Token &other );
	void set( const char *str, int len );
};

char *prepareLitString( const InputLoc &loc, const char *src, long length, 
			long &resLen, bool &caseInsensitive );

/* Store the value and type of a priority augmentation. */
struct PriorityAug
{
	PriorityAug( AugType type, int priorKey, int priorValue ) :
		type(type), priorKey(priorKey), priorValue(priorValue) { }

	AugType type;
	int priorKey;
	int priorValue;
};

/*
 * A Variable Definition
 */
struct VarDef
{
	VarDef( const char *name, MachineDef *machineDef )
		: name(name), machineDef(machineDef), isExport(false) { }
	
	/* Parse tree traversal. */
	FsmAp *walk( ParseData *pd );
	void makeNameTree( const InputLoc &loc, ParseData *pd );
	void resolveNameRefs( ParseData *pd );

	const char *name;
	MachineDef *machineDef;
	bool isExport;
};


/*
 * LongestMatch
 *
 * Wherever possible the item match will execute on the character. If not
 * possible the item match will execute on a lookahead character and either
 * hold the current char (if one away) or backup.
 *
 * How to handle the problem of backing up over a buffer break?
 * 
 * Don't want to use pending out transitions for embedding item match because
 * the role of item match action is different: it may sometimes match on the
 * final transition, or may match on a lookahead character.
 *
 * Don't want to invent a new operator just for this. So just trail action
 * after machine, this means we can only use literal actions.
 *
 * The item action may 
 *
 * What states of the machine will be final. The item actions that wrap around
 * on the last character will go straight to the start state.
 *
 * Some transitions will be lookahead transitions, they will hold the current
 * character. Crossing them with regular transitions must be restricted
 * because it does not make sense. The transition cannot simultaneously hold
 * and consume the current character.
 */
struct LongestMatchPart
{
	LongestMatchPart( Join *join, Action *action, 
			InputLoc &semiLoc, int longestMatchId )
	: 
		join(join), action(action), semiLoc(semiLoc), 
		longestMatchId(longestMatchId), inLmSelect(false) { }

	InputLoc getLoc();
	
	Join *join;
	Action *action;
	InputLoc semiLoc;

	Action *setActId;
	Action *actOnLast;
	Action *actOnNext;
	Action *actLagBehind;
	int longestMatchId;
	bool inLmSelect;
	LongestMatch *longestMatch;

	LongestMatchPart *prev, *next;
};

/* Declare a new type so that ptreetypes.h need not include dlist.h. */
struct LmPartList : DList<LongestMatchPart> {};

struct LongestMatch
{
	/* Construct with a list of joins */
	LongestMatch( const InputLoc &loc, LmPartList *longestMatchList ) : 
		loc(loc), longestMatchList(longestMatchList), name(0), 
		lmSwitchHandlesError(false) { }

	/* Tree traversal. */
	FsmAp *walk( ParseData *pd );
	void makeNameTree( ParseData *pd );
	void resolveNameRefs( ParseData *pd );
	void transferScannerLeavingActions( FsmAp *graph );
	void runLongestMatch( ParseData *pd, FsmAp *graph );
	Action *newAction( ParseData *pd, const InputLoc &loc, const char *name, 
			InlineList *inlineList );
	void makeActions( ParseData *pd );
	void findName( ParseData *pd );
	void restart( FsmAp *graph, TransAp *trans );

	InputLoc loc;
	LmPartList *longestMatchList;
	const char *name;

	Action *lmActSelect;
	bool lmSwitchHandlesError;

	LongestMatch *next, *prev;
};


/* List of Expressions. */
typedef DList<Expression> ExprList;

struct MachineDef
{
	enum Type {
		JoinType,
		LongestMatchType,
		LengthDefType
	};

	MachineDef( Join *join )
		: join(join), longestMatch(0), lengthDef(0), type(JoinType) {}
	MachineDef( LongestMatch *longestMatch )
		: join(0), longestMatch(longestMatch), lengthDef(0), type(LongestMatchType) {}
	MachineDef( LengthDef *lengthDef )
		: join(0), longestMatch(0), lengthDef(lengthDef), type(LengthDefType) {}

	FsmAp *walk( ParseData *pd );
	void makeNameTree( ParseData *pd );
	void resolveNameRefs( ParseData *pd );
	
	Join *join;
	LongestMatch *longestMatch;
	LengthDef *lengthDef;
	Type type;
};

/*
 * Join
 */
struct Join
{
	/* Construct with the first expression. */
	Join( Expression *expr );
	Join( const InputLoc &loc, Expression *expr );

	/* Tree traversal. */
	FsmAp *walk( ParseData *pd );
	FsmAp *walkJoin( ParseData *pd );
	void makeNameTree( ParseData *pd );
	void resolveNameRefs( ParseData *pd );

	/* Data. */
	InputLoc loc;
	ExprList exprList;
};

/*
 * Expression
 */
struct Expression
{
	enum Type { 
		OrType,
		IntersectType, 
		SubtractType, 
		StrongSubtractType,
		TermType, 
		BuiltinType
	};

	/* Construct with an expression on the left and a term on the right. */
	Expression( Expression *expression, Term *term, Type type ) : 
		expression(expression), term(term), 
		type(type), prev(this), next(this) { }

	/* Construct with only a term. */
	Expression( Term *term ) : 
		expression(0), term(term),
		type(TermType) , prev(this), next(this) { }
	
	/* Construct with a builtin type. */
	Expression( BuiltinMachine builtin ) : 
		expression(0), term(0), builtin(builtin), 
		type(BuiltinType), prev(this), next(this) { }

	~Expression();

	/* Tree traversal. */
	FsmAp *walk( ParseData *pd, bool lastInSeq = true );
	void makeNameTree( ParseData *pd );
	void resolveNameRefs( ParseData *pd );

	/* Node data. */
	Expression *expression;
	Term *term;
	BuiltinMachine builtin;
	Type type;

	Expression *prev, *next;
};

/*
 * Term
 */
struct Term 
{
	enum Type { 
		ConcatType, 
		RightStartType,
		RightFinishType,
		LeftType,
		FactorWithAugType
	};

	Term( Term *term, FactorWithAug *factorWithAug ) :
		term(term), factorWithAug(factorWithAug), type(ConcatType) { }

	Term( Term *term, FactorWithAug *factorWithAug, Type type ) :
		term(term), factorWithAug(factorWithAug), type(type) { }

	Term( FactorWithAug *factorWithAug ) :
		term(0), factorWithAug(factorWithAug), type(FactorWithAugType) { }
	
	~Term();

	FsmAp *walk( ParseData *pd, bool lastInSeq = true );
	void makeNameTree( ParseData *pd );
	void resolveNameRefs( ParseData *pd );

	Term *term;
	FactorWithAug *factorWithAug;
	Type type;

	/* Priority descriptor for RightFinish type. */
	PriorDesc priorDescs[2];
};


/* Third level of precedence. Augmenting nodes with actions and priorities. */
struct FactorWithAug
{
	FactorWithAug( FactorWithRep *factorWithRep ) :
		priorDescs(0), factorWithRep(factorWithRep) { }
	~FactorWithAug();

	/* Tree traversal. */
	FsmAp *walk( ParseData *pd );
	void makeNameTree( ParseData *pd );
	void resolveNameRefs( ParseData *pd );

	void assignActions( ParseData *pd, FsmAp *graph, int *actionOrd );
	void assignPriorities( FsmAp *graph, int *priorOrd );

	void assignConditions( FsmAp *graph );

	/* Actions and priorities assigned to the factor node. */
	Vector<ParserAction> actions;
	Vector<PriorityAug> priorityAugs;
	PriorDesc *priorDescs;
	Vector<Label> labels;
	Vector<EpsilonLink> epsilonLinks;
	Vector<ConditionTest> conditions;

	FactorWithRep *factorWithRep;
};

/* Fourth level of precedence. Trailing unary operators. Provide kleen star,
 * optional and plus. */
struct FactorWithRep
{
	enum Type { 
		StarType,
		StarStarType,
		OptionalType,
		PlusType, 
		ExactType,
		MaxType,
		MinType,
		RangeType,
		FactorWithNegType
	};

	 FactorWithRep( const InputLoc &loc, FactorWithRep *factorWithRep, 
			int lowerRep, int upperRep, Type type ) :
		loc(loc), factorWithRep(factorWithRep), 
		factorWithNeg(0), lowerRep(lowerRep), 
		upperRep(upperRep), type(type) { }
	
	FactorWithRep( FactorWithNeg *factorWithNeg )
		: factorWithNeg(factorWithNeg), type(FactorWithNegType) { }

	~FactorWithRep();

	/* Tree traversal. */
	FsmAp *walk( ParseData *pd );
	void makeNameTree( ParseData *pd );
	void resolveNameRefs( ParseData *pd );

	InputLoc loc;
	FactorWithRep *factorWithRep;
	FactorWithNeg *factorWithNeg;
	int lowerRep, upperRep;
	Type type;

	/* Priority descriptor for StarStar type. */
	PriorDesc priorDescs[2];
};

/* Fifth level of precedence. Provides Negation. */
struct FactorWithNeg
{
	enum Type { 
		NegateType, 
		CharNegateType,
		FactorType
	};

	FactorWithNeg( const InputLoc &loc, FactorWithNeg *factorWithNeg, Type type) :
		loc(loc), factorWithNeg(factorWithNeg), factor(0), type(type) { }

	FactorWithNeg( Factor *factor ) :
		factorWithNeg(0), factor(factor), type(FactorType) { }

	~FactorWithNeg();

	/* Tree traversal. */
	FsmAp *walk( ParseData *pd );
	void makeNameTree( ParseData *pd );
	void resolveNameRefs( ParseData *pd );

	InputLoc loc;
	FactorWithNeg *factorWithNeg;
	Factor *factor;
	Type type;
};

/*
 * Factor
 */
struct Factor
{
	/* Language elements a factor node can be. */
	enum Type {
		LiteralType, 
		RangeType, 
		OrExprType,
		RegExprType, 
		ReferenceType,
		ParenType,
		LongestMatchType,
	}; 

	/* Construct with a literal fsm. */
	Factor( Literal *literal ) :
		literal(literal), type(LiteralType) { }

	/* Construct with a range. */
	Factor( Range *range ) : 
		range(range), type(RangeType) { }
	
	/* Construct with the or part of a regular expression. */
	Factor( ReItem *reItem ) :
		reItem(reItem), type(OrExprType) { }

	/* Construct with a regular expression. */
	Factor( RegExpr *regExpr ) :
		regExpr(regExpr), type(RegExprType) { }

	/* Construct with a reference to a var def. */
	Factor( const InputLoc &loc, VarDef *varDef ) :
		loc(loc), varDef(varDef), type(ReferenceType) {}

	/* Construct with a parenthesized join. */
	Factor( Join *join ) :
		join(join), type(ParenType) {}
	
	/* Construct with a longest match operator. */
	Factor( LongestMatch *longestMatch ) :
		longestMatch(longestMatch), type(LongestMatchType) {}

	/* Cleanup. */
	~Factor();

	/* Tree traversal. */
	FsmAp *walk( ParseData *pd );
	void makeNameTree( ParseData *pd );
	void resolveNameRefs( ParseData *pd );

	InputLoc loc;
	Literal *literal;
	Range *range;
	ReItem *reItem;
	RegExpr *regExpr;
	VarDef *varDef;
	Join *join;
	LongestMatch *longestMatch;
	int lower, upper;
	Type type;
};

/* A range machine. Only ever composed of two literals. */
struct Range
{
	Range( Literal *lowerLit, Literal *upperLit ) 
		: lowerLit(lowerLit), upperLit(upperLit) { }

	~Range();
	FsmAp *walk( ParseData *pd );

	Literal *lowerLit;
	Literal *upperLit;
};

/* Some literal machine. Can be a number or literal string. */
struct Literal
{
	enum LiteralType { Number, LitString };

	Literal( const Token &token, LiteralType type )
		: token(token), type(type) { }

	FsmAp *walk( ParseData *pd );
	
	Token token;
	LiteralType type;
};

/* Regular expression. */
struct RegExpr
{
	enum RegExpType { RecurseItem, Empty };

	/* Constructors. */
	RegExpr() : 
		type(Empty), caseInsensitive(false) { }
	RegExpr(RegExpr *regExpr, ReItem *item) : 
		regExpr(regExpr), item(item), 
		type(RecurseItem), caseInsensitive(false) { }

	~RegExpr();
	FsmAp *walk( ParseData *pd, RegExpr *rootRegex );

	RegExpr *regExpr;
	ReItem *item;
	RegExpType type;
	bool caseInsensitive;
};

/* An item in a regular expression. */
struct ReItem
{
	enum ReItemType { Data, Dot, OrBlock, NegOrBlock };
	
	ReItem( const InputLoc &loc, const Token &token ) 
		: loc(loc), token(token), star(false), type(Data) { }
	ReItem( const InputLoc &loc, ReItemType type )
		: loc(loc), star(false), type(type) { }
	ReItem( const InputLoc &loc, ReOrBlock *orBlock, ReItemType type )
		: loc(loc), orBlock(orBlock), star(false), type(type) { }

	~ReItem();
	FsmAp *walk( ParseData *pd, RegExpr *rootRegex );

	InputLoc loc;
	Token token;
	ReOrBlock *orBlock;
	bool star;
	ReItemType type;
};

/* An or block item. */
struct ReOrBlock
{
	enum ReOrBlockType { RecurseItem, Empty };

	/* Constructors. */
	ReOrBlock()
		: type(Empty) { }
	ReOrBlock(ReOrBlock *orBlock, ReOrItem *item)
		: orBlock(orBlock), item(item), type(RecurseItem) { }

	~ReOrBlock();
	FsmAp *walk( ParseData *pd, RegExpr *rootRegex );
	
	ReOrBlock *orBlock;
	ReOrItem *item;
	ReOrBlockType type;
};

/* An item in an or block. */
struct ReOrItem
{
	enum ReOrItemType { Data, Range };

	ReOrItem( const InputLoc &loc, const Token &token ) 
		: loc(loc), token(token), type(Data) {}
	ReOrItem( const InputLoc &loc, char lower, char upper )
		: loc(loc), lower(lower), upper(upper), type(Range) { }

	FsmAp *walk( ParseData *pd, RegExpr *rootRegex );

	InputLoc loc;
	Token token;
	char lower;
	char upper;
	ReOrItemType type;
};


/*
 * Inline code tree
 */
struct InlineList;
struct InlineItem
{
	enum Type 
	{
		Text, Goto, Call, Next, GotoExpr, CallExpr, NextExpr, Ret, PChar,
		Char, Hold, Curs, Targs, Entry, Exec, LmSwitch, LmSetActId,
		LmSetTokEnd, LmOnLast, LmOnNext, LmOnLagBehind, LmInitAct,
		LmInitTokStart, LmSetTokStart, Break
	};

	InlineItem( const InputLoc &loc, char *data, Type type ) : 
		loc(loc), data(data), nameRef(0), children(0), type(type) { }

	InlineItem( const InputLoc &loc, NameRef *nameRef, Type type ) : 
		loc(loc), data(0), nameRef(nameRef), children(0), type(type) { }

	InlineItem( const InputLoc &loc, LongestMatch *longestMatch, 
		LongestMatchPart *longestMatchPart, Type type ) : loc(loc), data(0),
		nameRef(0), children(0), longestMatch(longestMatch),
		longestMatchPart(longestMatchPart), type(type) { } 

	InlineItem( const InputLoc &loc, NameInst *nameTarg, Type type ) : 
		loc(loc), data(0), nameRef(0), nameTarg(nameTarg), children(0),
		type(type) { }

	InlineItem( const InputLoc &loc, Type type ) : 
		loc(loc), data(0), nameRef(0), children(0), type(type) { }
	
	InputLoc loc;
	char *data;
	NameRef *nameRef;
	NameInst *nameTarg;
	InlineList *children;
	LongestMatch *longestMatch;
	LongestMatchPart *longestMatchPart;
	Type type;

	InlineItem *prev, *next;
};

/* Normally this would be atypedef, but that would entail including DList from
 * ptreetypes, which should be just typedef forwards. */
struct InlineList : public DList<InlineItem> { };

#endif