diff options
author | vvvv <vvvv@ydb.tech> | 2023-07-31 18:21:04 +0300 |
---|---|---|
committer | vvvv <vvvv@ydb.tech> | 2023-07-31 18:21:04 +0300 |
commit | dec41c40e51aa407edef81a3c566a5a15780fc49 (patch) | |
tree | 4f197b596b32f35eca368121f0dff913419da9af | |
parent | 3ca8b54c96e09eb2b65be7f09675623438d559c7 (diff) | |
download | ydb-dec41c40e51aa407edef81a3c566a5a15780fc49.tar.gz |
YQL-16239 Move purecalc to public
698 files changed, 82714 insertions, 0 deletions
diff --git a/CMakeLists.darwin-x86_64.txt b/CMakeLists.darwin-x86_64.txt index 075d57df50..a68b571b8b 100644 --- a/CMakeLists.darwin-x86_64.txt +++ b/CMakeLists.darwin-x86_64.txt @@ -13,3 +13,9 @@ add_subdirectory(util) add_subdirectory(yt) add_subdirectory(certs) add_subdirectory(ydb) +add_subdirectory(yql) +add_subdirectory(kernel) +add_subdirectory(yweb) +add_subdirectory(mapreduce) +add_subdirectory(zora) +add_subdirectory(geobase) diff --git a/CMakeLists.linux-aarch64.txt b/CMakeLists.linux-aarch64.txt index 075d57df50..a68b571b8b 100644 --- a/CMakeLists.linux-aarch64.txt +++ b/CMakeLists.linux-aarch64.txt @@ -13,3 +13,9 @@ add_subdirectory(util) add_subdirectory(yt) add_subdirectory(certs) add_subdirectory(ydb) +add_subdirectory(yql) +add_subdirectory(kernel) +add_subdirectory(yweb) +add_subdirectory(mapreduce) +add_subdirectory(zora) +add_subdirectory(geobase) diff --git a/CMakeLists.linux-x86_64.txt b/CMakeLists.linux-x86_64.txt index 075d57df50..a68b571b8b 100644 --- a/CMakeLists.linux-x86_64.txt +++ b/CMakeLists.linux-x86_64.txt @@ -13,3 +13,9 @@ add_subdirectory(util) add_subdirectory(yt) add_subdirectory(certs) add_subdirectory(ydb) +add_subdirectory(yql) +add_subdirectory(kernel) +add_subdirectory(yweb) +add_subdirectory(mapreduce) +add_subdirectory(zora) +add_subdirectory(geobase) diff --git a/CMakeLists.windows-x86_64.txt b/CMakeLists.windows-x86_64.txt index e8667d4e27..b4dc0d7487 100644 --- a/CMakeLists.windows-x86_64.txt +++ b/CMakeLists.windows-x86_64.txt @@ -13,3 +13,9 @@ add_subdirectory(library) add_subdirectory(yt) add_subdirectory(certs) add_subdirectory(ydb) +add_subdirectory(yql) +add_subdirectory(kernel) +add_subdirectory(yweb) +add_subdirectory(mapreduce) +add_subdirectory(zora) +add_subdirectory(geobase) diff --git a/contrib/libs/libc_compat/include/link/link.h b/contrib/libs/libc_compat/include/link/link.h new file mode 100644 index 0000000000..7352c26166 --- /dev/null +++ b/contrib/libs/libc_compat/include/link/link.h @@ -0,0 +1,15 @@ +#pragma once + +#ifdef _MSC_VER + +#ifdef _cplusplus +extern "C" { +#endif + +int link(const char *oldpath, const char *newpath); + +#ifdef _cplusplus +} +#endif + +#endif diff --git a/contrib/tools/ragel5/common/buffer.h b/contrib/tools/ragel5/common/buffer.h new file mode 100644 index 0000000000..99c4e82d49 --- /dev/null +++ b/contrib/tools/ragel5/common/buffer.h @@ -0,0 +1,55 @@ +/* + * Copyright 2003 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _BUFFER_H +#define _BUFFER_H + +#define BUFFER_INITIAL_SIZE 4096 + +/* An automatically grown buffer for collecting tokens. Always reuses space; + * never down resizes. */ +struct Buffer +{ + Buffer() + { + data = (char*) malloc( BUFFER_INITIAL_SIZE ); + allocated = BUFFER_INITIAL_SIZE; + length = 0; + } + ~Buffer() { free(data); } + + void append( char p ) + { + if ( length == allocated ) { + allocated *= 2; + data = (char*) realloc( data, allocated ); + } + data[length++] = p; + } + + void clear() { length = 0; } + + char *data; + int allocated; + int length; +}; + +#endif /* _BUFFER_H */ diff --git a/contrib/tools/ragel5/common/common.cpp b/contrib/tools/ragel5/common/common.cpp new file mode 100644 index 0000000000..4484dcbd73 --- /dev/null +++ b/contrib/tools/ragel5/common/common.cpp @@ -0,0 +1,296 @@ +/* + * Copyright 2006-2007 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "pcheck.h" +#include "common.h" +#include <string.h> +#include <assert.h> + +#ifdef _WIN32
+#include <malloc.h>
+#else
+#include <alloca.h>
+#endif + +HostType hostTypesC[] = +{ + { "char", 0, true, CHAR_MIN, CHAR_MAX, sizeof(char) }, + { "unsigned", "char", false, 0, UCHAR_MAX, sizeof(unsigned char) }, + { "short", 0, true, SHRT_MIN, SHRT_MAX, sizeof(short) }, + { "unsigned", "short", false, 0, USHRT_MAX, sizeof(unsigned short) }, + { "int", 0, true, INT_MIN, INT_MAX, sizeof(int) }, + { "unsigned", "int", false, 0, UINT_MAX, sizeof(unsigned int) }, + { "long", 0, true, LONG_MIN, LONG_MAX, sizeof(long) }, + { "unsigned", "long", false, 0, (long long)ULONG_MAX, sizeof(unsigned long) } +}; + +HostType hostTypesD[] = +{ + { "byte", 0, true, CHAR_MIN, CHAR_MAX, 1 }, + { "ubyte", 0, false, 0, UCHAR_MAX, 1 }, + { "char", 0, false, 0, UCHAR_MAX, 1 }, + { "short", 0, true, SHRT_MIN, SHRT_MAX, 2 }, + { "ushort", 0, false, 0, USHRT_MAX, 2 }, + { "wchar", 0, false, 0, USHRT_MAX, 2 }, + { "int", 0, true, INT_MIN, INT_MAX, 4 }, + { "uint", 0, false, 0, UINT_MAX, 4 }, + { "dchar", 0, false, 0, UINT_MAX, 4 } +}; + +HostType hostTypesJava[] = +{ + { "byte", 0, true, CHAR_MIN, CHAR_MAX, 1 }, + { "short", 0, true, SHRT_MIN, SHRT_MAX, 2 }, + { "char", 0, false, 0, USHRT_MAX, 2 }, + { "int", 0, true, INT_MIN, INT_MAX, 4 }, +}; + +HostType hostTypesRuby[] = +{ + { "byte", 0, true, CHAR_MIN, CHAR_MAX, 1 }, + { "short", 0, true, SHRT_MIN, SHRT_MAX, 2 }, + { "char", 0, false, 0, USHRT_MAX, 2 }, + { "int", 0, true, INT_MIN, INT_MAX, 4 }, +}; + +HostLang hostLangC = { hostTypesC, 8, hostTypesC+0, true }; +HostLang hostLangD = { hostTypesD, 9, hostTypesD+2, true }; +HostLang hostLangJava = { hostTypesJava, 4, hostTypesJava+2, false }; +HostLang hostLangRuby = { hostTypesRuby, 4, hostTypesRuby+2, false }; + +HostLang *hostLang = &hostLangC; +HostLangType hostLangType = CCode; + +/* Construct a new parameter checker with for paramSpec. */ +ParamCheck::ParamCheck(const char *paramSpec, int argc, char **argv) +: + state(noparam), + argOffset(0), + curArg(0), + iCurArg(1), + paramSpec(paramSpec), + argc(argc), + argv(argv) +{ +} + +/* Check a single option. Returns the index of the next parameter. Sets p to + * the arg character if valid, 0 otherwise. Sets parg to the parameter arg if + * there is one, NULL otherwise. */ +bool ParamCheck::check() +{ + bool requiresParam; + + if ( iCurArg >= argc ) { /* Off the end of the arg list. */ + state = noparam; + return false; + } + + if ( argOffset != 0 && *argOffset == 0 ) { + /* We are at the end of an arg string. */ + iCurArg += 1; + if ( iCurArg >= argc ) { + state = noparam; + return false; + } + argOffset = 0; + } + + if ( argOffset == 0 ) { + /* Set the current arg. */ + curArg = argv[iCurArg]; + + /* We are at the beginning of an arg string. */ + if ( argv[iCurArg] == 0 || /* Argv[iCurArg] is null. */ + argv[iCurArg][0] != '-' || /* Not a param. */ + argv[iCurArg][1] == 0 ) { /* Only a dash. */ + parameter = 0; + parameterArg = 0; + + iCurArg += 1; + state = noparam; + return true; + } + argOffset = argv[iCurArg] + 1; + } + + /* Get the arg char. */ + char argChar = *argOffset; + + /* Loop over all the parms and look for a match. */ + const char *pSpec = paramSpec; + while ( *pSpec != 0 ) { + char pSpecChar = *pSpec; + + /* If there is a ':' following the char then + * it requires a parm. If a parm is required + * then move ahead two in the parmspec. Otherwise + * move ahead one in the parm spec. */ + if ( pSpec[1] == ':' ) { + requiresParam = true; + pSpec += 2; + } + else { + requiresParam = false; + pSpec += 1; + } + + /* Do we have a match. */ + if ( argChar == pSpecChar ) { + if ( requiresParam ) { + if ( argOffset[1] == 0 ) { + /* The param must follow. */ + if ( iCurArg + 1 == argc ) { + /* We are the last arg so there + * cannot be a parameter to it. */ + parameter = argChar; + parameterArg = 0; + iCurArg += 1; + argOffset = 0; + state = invalid; + return true; + } + else { + /* the parameter to the arg is the next arg. */ + parameter = pSpecChar; + parameterArg = argv[iCurArg + 1]; + iCurArg += 2; + argOffset = 0; + state = match; + return true; + } + } + else { + /* The param for the arg is built in. */ + parameter = pSpecChar; + parameterArg = argOffset + 1; + iCurArg += 1; + argOffset = 0; + state = match; + return true; + } + } + else { + /* Good, we matched the parm and no + * arg is required. */ + parameter = pSpecChar; + parameterArg = 0; + argOffset += 1; + state = match; + return true; + } + } + } + + /* We did not find a match. Bad Argument. */ + parameter = argChar; + parameterArg = 0; + argOffset += 1; + state = invalid; + return true; +} + +void NormalizeWinPath(char* input) { + const size_t len = strlen(input); + char* res = static_cast<char*>(alloca(len + 1)); + for (size_t i = 0, j = 0; i <= len; ++i, ++j) { + if (input[i] == '\\') { + res[j] = '/'; + if (i < len - 2 && input[i + 1] == '\\') + ++i; + } else { + res[j] = input[i]; + } + } + strcpy(input, res); +} + +/* Counts newlines before sending sync. */ +int output_filter::sync( ) +{ + line += 1; + return std::filebuf::sync(); +} + +/* Counts newlines before sending data out to file. */ +std::streamsize output_filter::xsputn( const char *s, std::streamsize n ) +{ + for ( int i = 0; i < n; i++ ) { + if ( s[i] == '\n' ) + line += 1; + } + return std::filebuf::xsputn( s, n ); +} + +/* Scans a string looking for the file extension. If there is a file + * extension then pointer returned points to inside the string + * passed in. Otherwise returns null. */ +char *findFileExtension( char *stemFile ) +{ + char *ppos = stemFile + strlen(stemFile) - 1; + + /* Scan backwards from the end looking for the first dot. + * If we encounter a '/' before the first dot, then stop the scan. */ + while ( 1 ) { + /* If we found a dot or got to the beginning of the string then + * we are done. */ + if ( ppos == stemFile || *ppos == '.' ) + break; + + /* If we hit a / then there is no extension. Done. */ + if ( *ppos == '/' ) { + ppos = stemFile; + break; + } + ppos--; + } + + /* If we got to the front of the string then bail we + * did not find an extension */ + if ( ppos == stemFile ) + ppos = 0; + + return ppos; +} + +/* Make a file name from a stem. Removes the old filename suffix and + * replaces it with a new one. Returns a newed up string. */ +char *fileNameFromStem( char *stemFile, const char *suffix ) +{ + int len = strlen( stemFile ); + assert( len > 0 ); + + /* Get the extension. */ + char *ppos = findFileExtension( stemFile ); + + /* If an extension was found, then shorten what we think the len is. */ + if ( ppos != 0 ) + len = ppos - stemFile; + + /* Make the return string from the stem and the suffix. */ + char *retVal = new char[ len + strlen( suffix ) + 1 ]; + strncpy( retVal, stemFile, len ); + strcpy( retVal + len, suffix ); + + return retVal; +} + + diff --git a/contrib/tools/ragel5/common/common.h b/contrib/tools/ragel5/common/common.h new file mode 100644 index 0000000000..aae6f85add --- /dev/null +++ b/contrib/tools/ragel5/common/common.h @@ -0,0 +1,308 @@ +/* + * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _COMMON_H +#define _COMMON_H + +#include <fstream> +#include <climits> +#include "dlist.h" + +typedef unsigned long long Size; + +struct Key +{ +private: + long key; + +public: + friend inline Key operator+(const Key key1, const Key key2); + friend inline Key operator-(const Key key1, const Key key2); + friend inline Key operator/(const Key key1, const Key key2); + friend inline long operator&(const Key key1, const Key key2); + + friend inline bool operator<( const Key key1, const Key key2 ); + friend inline bool operator<=( const Key key1, const Key key2 ); + friend inline bool operator>( const Key key1, const Key key2 ); + friend inline bool operator>=( const Key key1, const Key key2 ); + friend inline bool operator==( const Key key1, const Key key2 ); + friend inline bool operator!=( const Key key1, const Key key2 ); + + friend struct KeyOps; + + Key( ) {} + Key( const Key &key ) : key(key.key) {} + Key( long key ) : key(key) {} + + /* Returns the value used to represent the key. This value must be + * interpreted based on signedness. */ + long getVal() const { return key; }; + + /* Returns the key casted to a long long. This form of the key does not + * require and signedness interpretation. */ + long long getLongLong() const; + + bool isUpper() const { return ( 'A' <= key && key <= 'Z' ); } + bool isLower() const { return ( 'a' <= key && key <= 'z' ); } + bool isPrintable() const + { + return ( 7 <= key && key <= 13 ) || ( 32 <= key && key < 127 ); + } + + Key toUpper() const + { return Key( 'A' + ( key - 'a' ) ); } + Key toLower() const + { return Key( 'a' + ( key - 'A' ) ); } + + void operator+=( const Key other ) + { + /* FIXME: must be made aware of isSigned. */ + key += other.key; + } + + void operator-=( const Key other ) + { + /* FIXME: must be made aware of isSigned. */ + key -= other.key; + } + + void operator|=( const Key other ) + { + /* FIXME: must be made aware of isSigned. */ + key |= other.key; + } + + /* Decrement. Needed only for ranges. */ + inline void decrement(); + inline void increment(); +}; + +struct HostType +{ + const char *data1; + const char *data2; + bool isSigned; + long long minVal; + long long maxVal; + unsigned int size; +}; + +struct HostLang +{ + HostType *hostTypes; + int numHostTypes; + HostType *defaultAlphType; + bool explicitUnsigned; +}; + + +/* Target language. */ +enum HostLangType +{ + CCode, + DCode, + JavaCode, + RubyCode +}; + +extern HostLang *hostLang; +extern HostLangType hostLangType; + +extern HostLang hostLangC; +extern HostLang hostLangD; +extern HostLang hostLangJava; +extern HostLang hostLangRuby; + +/* An abstraction of the key operators that manages key operations such as + * comparison and increment according the signedness of the key. */ +struct KeyOps +{ + /* Default to signed alphabet. */ + KeyOps() : + isSigned(true), + alphType(0) + {} + + /* Default to signed alphabet. */ + KeyOps( bool isSigned ) + :isSigned(isSigned) {} + + bool isSigned; + Key minKey, maxKey; + HostType *alphType; + + void setAlphType( HostType *alphType ) + { + this->alphType = alphType; + isSigned = alphType->isSigned; + if ( isSigned ) { + minKey = (long) alphType->minVal; + maxKey = (long) alphType->maxVal; + } + else { + minKey = (long) (unsigned long) alphType->minVal; + maxKey = (long) (unsigned long) alphType->maxVal; + } + } + + /* Compute the distance between two keys. */ + Size span( Key key1, Key key2 ) + { + return isSigned ? + (unsigned long long)( + (long long)key2.key - + (long long)key1.key + 1) : + (unsigned long long)( + (unsigned long)key2.key) - + (unsigned long long)((unsigned long)key1.key) + 1; + } + + Size alphSize() + { return span( minKey, maxKey ); } + + HostType *typeSubsumes( long long maxVal ) + { + for ( int i = 0; i < hostLang->numHostTypes; i++ ) { + if ( maxVal <= hostLang->hostTypes[i].maxVal ) + return hostLang->hostTypes + i; + } + return 0; + } + + HostType *typeSubsumes( bool isSigned, long long maxVal ) + { + for ( int i = 0; i < hostLang->numHostTypes; i++ ) { + if ( ( isSigned == hostLang->hostTypes[i].isSigned ) && + maxVal <= hostLang->hostTypes[i].maxVal ) + return hostLang->hostTypes + i; + } + return 0; + } +}; + +extern KeyOps *keyOps; + +inline bool operator<( const Key key1, const Key key2 ) +{ + return keyOps->isSigned ? key1.key < key2.key : + (unsigned long)key1.key < (unsigned long)key2.key; +} + +inline bool operator<=( const Key key1, const Key key2 ) +{ + return keyOps->isSigned ? key1.key <= key2.key : + (unsigned long)key1.key <= (unsigned long)key2.key; +} + +inline bool operator>( const Key key1, const Key key2 ) +{ + return keyOps->isSigned ? key1.key > key2.key : + (unsigned long)key1.key > (unsigned long)key2.key; +} + +inline bool operator>=( const Key key1, const Key key2 ) +{ + return keyOps->isSigned ? key1.key >= key2.key : + (unsigned long)key1.key >= (unsigned long)key2.key; +} + +inline bool operator==( const Key key1, const Key key2 ) +{ + return key1.key == key2.key; +} + +inline bool operator!=( const Key key1, const Key key2 ) +{ + return key1.key != key2.key; +} + +/* Decrement. Needed only for ranges. */ +inline void Key::decrement() +{ + key = keyOps->isSigned ? key - 1 : ((unsigned long)key)-1; +} + +/* Increment. Needed only for ranges. */ +inline void Key::increment() +{ + key = keyOps->isSigned ? key+1 : ((unsigned long)key)+1; +} + +inline long long Key::getLongLong() const +{ + return keyOps->isSigned ? (long long)key : (long long)(unsigned long)key; +} + +inline Key operator+(const Key key1, const Key key2) +{ + /* FIXME: must be made aware of isSigned. */ + return Key( key1.key + key2.key ); +} + +inline Key operator-(const Key key1, const Key key2) +{ + /* FIXME: must be made aware of isSigned. */ + return Key( key1.key - key2.key ); +} + +inline long operator&(const Key key1, const Key key2) +{ + /* FIXME: must be made aware of isSigned. */ + return key1.key & key2.key; +} + +inline Key operator/(const Key key1, const Key key2) +{ + /* FIXME: must be made aware of isSigned. */ + return key1.key / key2.key; +} + +/* Filter on the output stream that keeps track of the number of lines + * output. */ +class output_filter : public std::filebuf +{ +public: + output_filter( char *fileName ) : fileName(fileName), line(1) { } + + virtual int sync(); + virtual std::streamsize xsputn(const char* s, std::streamsize n); + + char *fileName; + int line; +}; + +char *findFileExtension( char *stemFile ); +char *fileNameFromStem( char *stemFile, const char *suffix ); + +struct Export +{ + Export(const char *name, Key key ) + : name(name), key(key) {} + + const char *name; + Key key; + + Export *prev, *next; +}; + +typedef DList<Export> ExportList; + +#endif /* _COMMON_H */ diff --git a/contrib/tools/ragel5/common/config.h b/contrib/tools/ragel5/common/config.h new file mode 100644 index 0000000000..405cfd6c3b --- /dev/null +++ b/contrib/tools/ragel5/common/config.h @@ -0,0 +1,39 @@ +/* common/config.h. Generated by configure. */ +/* + * Copyright 2001 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _CONFIG_H +#define _CONFIG_H + +/* Programs. */ +/* #undef GDC */ +#define GOBJC gcc -x objective-c +#define CXX c++ +#define CC cc +/* #undef JAVAC */ +/* #undef TXL */ +/* #undef RUBY */ + +#ifdef WIN32 +#define strcasecmp _stricmp +#endif + +#endif /* _CONFIG_H */ diff --git a/contrib/tools/ragel5/common/pcheck.h b/contrib/tools/ragel5/common/pcheck.h new file mode 100644 index 0000000000..5f95dc3c12 --- /dev/null +++ b/contrib/tools/ragel5/common/pcheck.h @@ -0,0 +1,51 @@ +/* + * Copyright 2001, 2002 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _PCHECK_H +#define _PCHECK_H + +class ParamCheck +{ +public: + ParamCheck(const char *paramSpec, int argc, char **argv); + + bool check(); + + char *parameterArg; /* The argument to the parameter. */ + char parameter; /* The parameter matched. */ + enum { match, invalid, noparam } state; + + char *argOffset; /* If we are reading params inside an + * arg this points to the offset. */ + + char *curArg; /* Pointer to the current arg. */ + int iCurArg; /* Index to the current arg. */ + +private: + const char *paramSpec; /* Parameter spec supplied by the coder. */ + int argc; /* Arguement data from the command line. */ + char **argv; + +}; + +void NormalizeWinPath(char* input); + +#endif /* _PCHECK_H */ diff --git a/contrib/tools/ragel5/common/version.h b/contrib/tools/ragel5/common/version.h new file mode 100644 index 0000000000..dba4eb2154 --- /dev/null +++ b/contrib/tools/ragel5/common/version.h @@ -0,0 +1,2 @@ +#define VERSION "5.19" +#define PUBDATE "March 2007" diff --git a/contrib/tools/ragel5/common/ya.make b/contrib/tools/ragel5/common/ya.make new file mode 100644 index 0000000000..7448cd2af3 --- /dev/null +++ b/contrib/tools/ragel5/common/ya.make @@ -0,0 +1,20 @@ +LIBRARY() + +LICENSE(GPL-2.0-or-later) + +NO_UTIL() +NO_COMPILER_WARNINGS() + +ADDINCL( + GLOBAL contrib/tools/ragel5/common +) + +PEERDIR( + contrib/tools/ragel5/aapl +) + +SRCS( + common.cpp +) + +END() diff --git a/contrib/tools/ragel5/ragel/fsmap.cpp b/contrib/tools/ragel5/ragel/fsmap.cpp new file mode 100644 index 0000000000..551aea0391 --- /dev/null +++ b/contrib/tools/ragel5/ragel/fsmap.cpp @@ -0,0 +1,840 @@ +/* + * Copyright 2002-2004 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "fsmgraph.h" +#include <iostream> +using std::cerr; +using std::endl; + +CondData *condData = 0; +KeyOps *keyOps = 0; + +/* Insert an action into an action table. */ +void ActionTable::setAction( int ordering, Action *action ) +{ + /* Multi-insert in case specific instances of an action appear in a + * transition more than once. */ + insertMulti( ordering, action ); +} + +/* Set all the action from another action table in this table. */ +void ActionTable::setActions( const ActionTable &other ) +{ + for ( ActionTable::Iter action = other; action.lte(); action++ ) + insertMulti( action->key, action->value ); +} + +void ActionTable::setActions( int *orderings, Action **actions, int nActs ) +{ + for ( int a = 0; a < nActs; a++ ) + insertMulti( orderings[a], actions[a] ); +} + +bool ActionTable::hasAction( Action *action ) +{ + for ( int a = 0; a < length(); a++ ) { + if ( data[a].value == action ) + return true; + } + return false; +} + +/* Insert an action into an action table. */ +void LmActionTable::setAction( int ordering, LongestMatchPart *action ) +{ + /* Multi-insert in case specific instances of an action appear in a + * transition more than once. */ + insertMulti( ordering, action ); +} + +/* Set all the action from another action table in this table. */ +void LmActionTable::setActions( const LmActionTable &other ) +{ + for ( LmActionTable::Iter action = other; action.lte(); action++ ) + insertMulti( action->key, action->value ); +} + +void ErrActionTable::setAction( int ordering, Action *action, int transferPoint ) +{ + insertMulti( ErrActionTableEl( action, ordering, transferPoint ) ); +} + +void ErrActionTable::setActions( const ErrActionTable &other ) +{ + for ( ErrActionTable::Iter act = other; act.lte(); act++ ) + insertMulti( ErrActionTableEl( act->action, act->ordering, act->transferPoint ) ); +} + +/* Insert a priority into this priority table. Looks out for priorities on + * duplicate keys. */ +void PriorTable::setPrior( int ordering, PriorDesc *desc ) +{ + PriorEl *lastHit = 0; + PriorEl *insed = insert( PriorEl(ordering, desc), &lastHit ); + if ( insed == 0 ) { + /* This already has a priority on the same key as desc. Overwrite the + * priority if the ordering is larger (later in time). */ + if ( ordering >= lastHit->ordering ) + *lastHit = PriorEl( ordering, desc ); + } +} + +/* Set all the priorities from a priorTable in this table. */ +void PriorTable::setPriors( const PriorTable &other ) +{ + /* Loop src priorities once to overwrite duplicates. */ + PriorTable::Iter priorIt = other; + for ( ; priorIt.lte(); priorIt++ ) + setPrior( priorIt->ordering, priorIt->desc ); +} + +/* Set the priority of starting transitions. Isolates the start state so it has + * no other entry points, then sets the priorities of all the transitions out + * of the start state. If the start state is final, then the outPrior of the + * start state is also set. The idea is that a machine that accepts the null + * string can still specify the starting trans prior for when it accepts the + * null word. */ +void FsmAp::startFsmPrior( int ordering, PriorDesc *prior ) +{ + /* Make sure the start state has no other entry points. */ + isolateStartState(); + + /* Walk all transitions out of the start state. */ + for ( TransList::Iter trans = startState->outList; trans.lte(); trans++ ) { + if ( trans->toState != 0 ) + trans->priorTable.setPrior( ordering, prior ); + } +} + +/* Set the priority of all transitions in a graph. Walks all transition lists + * and all def transitions. */ +void FsmAp::allTransPrior( int ordering, PriorDesc *prior ) +{ + /* Walk the list of all states. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + /* Walk the out list of the state. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + if ( trans->toState != 0 ) + trans->priorTable.setPrior( ordering, prior ); + } + } +} + +/* Set the priority of all transitions that go into a final state. Note that if + * any entry states are final, we will not be setting the priority of any + * transitions that may go into those states in the future. The graph does not + * support pending in transitions in the same way pending out transitions are + * supported. */ +void FsmAp::finishFsmPrior( int ordering, PriorDesc *prior ) +{ + /* Walk all final states. */ + for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) { + /* Walk all in transitions of the final state. */ + for ( TransInList::Iter trans = (*state)->inList; trans.lte(); trans++ ) + trans->priorTable.setPrior( ordering, prior ); + } +} + +/* Set the priority of any future out transitions that may be made going out of + * this state machine. */ +void FsmAp::leaveFsmPrior( int ordering, PriorDesc *prior ) +{ + /* Set priority in all final states. */ + for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) + (*state)->outPriorTable.setPrior( ordering, prior ); +} + + +/* Set actions to execute on starting transitions. Isolates the start state + * so it has no other entry points, then adds to the transition functions + * of all the transitions out of the start state. If the start state is final, + * then the func is also added to the start state's out func list. The idea is + * that a machine that accepts the null string can execute a start func when it + * matches the null word, which can only be done when leaving the start/final + * state. */ +void FsmAp::startFsmAction( int ordering, Action *action ) +{ + /* Make sure the start state has no other entry points. */ + isolateStartState(); + + /* Walk the start state's transitions, setting functions. */ + for ( TransList::Iter trans = startState->outList; trans.lte(); trans++ ) { + if ( trans->toState != 0 ) + trans->actionTable.setAction( ordering, action ); + } +} + +/* Set functions to execute on all transitions. Walks the out lists of all + * states. */ +void FsmAp::allTransAction( int ordering, Action *action ) +{ + /* Walk all states. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + /* Walk the out list of the state. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + if ( trans->toState != 0 ) + trans->actionTable.setAction( ordering, action ); + } + } +} + +/* Specify functions to execute upon entering final states. If the start state + * is final we can't really specify a function to execute upon entering that + * final state the first time. So function really means whenever entering a + * final state from within the same fsm. */ +void FsmAp::finishFsmAction( int ordering, Action *action ) +{ + /* Walk all final states. */ + for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) { + /* Walk the final state's in list. */ + for ( TransInList::Iter trans = (*state)->inList; trans.lte(); trans++ ) + trans->actionTable.setAction( ordering, action ); + } +} + +/* Add functions to any future out transitions that may be made going out of + * this state machine. */ +void FsmAp::leaveFsmAction( int ordering, Action *action ) +{ + /* Insert the action in the outActionTable of all final states. */ + for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) + (*state)->outActionTable.setAction( ordering, action ); +} + +/* Add functions to the longest match action table for constructing scanners. */ +void FsmAp::longMatchAction( int ordering, LongestMatchPart *lmPart ) +{ + /* Walk all final states. */ + for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) { + /* Walk the final state's in list. */ + for ( TransInList::Iter trans = (*state)->inList; trans.lte(); trans++ ) + trans->lmActionTable.setAction( ordering, lmPart ); + } +} + +void FsmAp::fillGaps( StateAp *state ) +{ + if ( state->outList.length() == 0 ) { + /* Add the range on the lower and upper bound. */ + attachNewTrans( state, 0, keyOps->minKey, keyOps->maxKey ); + } + else { + TransList srcList; + srcList.transfer( state->outList ); + + /* Check for a gap at the beginning. */ + TransList::Iter trans = srcList, next; + if ( keyOps->minKey < trans->lowKey ) { + /* Make the high key and append. */ + Key highKey = trans->lowKey; + highKey.decrement(); + + attachNewTrans( state, 0, keyOps->minKey, highKey ); + } + + /* Write the transition. */ + next = trans.next(); + state->outList.append( trans ); + + /* Keep the last high end. */ + Key lastHigh = trans->highKey; + + /* Loop each source range. */ + for ( trans = next; trans.lte(); trans = next ) { + /* Make the next key following the last range. */ + Key nextKey = lastHigh; + nextKey.increment(); + + /* Check for a gap from last up to here. */ + if ( nextKey < trans->lowKey ) { + /* Make the high end of the range that fills the gap. */ + Key highKey = trans->lowKey; + highKey.decrement(); + + attachNewTrans( state, 0, nextKey, highKey ); + } + + /* Reduce the transition. If it reduced to anything then add it. */ + next = trans.next(); + state->outList.append( trans ); + + /* Keep the last high end. */ + lastHigh = trans->highKey; + } + + /* Now check for a gap on the end to fill. */ + if ( lastHigh < keyOps->maxKey ) { + /* Get a copy of the default. */ + lastHigh.increment(); + + attachNewTrans( state, 0, lastHigh, keyOps->maxKey ); + } + } +} + +void FsmAp::setErrorAction( StateAp *state, int ordering, Action *action ) +{ + /* Fill any gaps in the out list with an error transition. */ + fillGaps( state ); + + /* Set error transitions in the transitions that go to error. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + if ( trans->toState == 0 ) + trans->actionTable.setAction( ordering, action ); + } +} + + +/* Give a target state for error transitions. */ +void FsmAp::setErrorTarget( StateAp *state, StateAp *target, int *orderings, + Action **actions, int nActs ) +{ + /* Fill any gaps in the out list with an error transition. */ + fillGaps( state ); + + /* Set error target in the transitions that go to error. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + if ( trans->toState == 0 ) { + /* The trans goes to error, redirect it. */ + redirectErrorTrans( trans->fromState, target, trans ); + trans->actionTable.setActions( orderings, actions, nActs ); + } + } +} + +void FsmAp::transferErrorActions( StateAp *state, int transferPoint ) +{ + for ( int i = 0; i < state->errActionTable.length(); ) { + ErrActionTableEl *act = state->errActionTable.data + i; + if ( act->transferPoint == transferPoint ) { + /* Transfer the error action and remove it. */ + setErrorAction( state, act->ordering, act->action ); + state->errActionTable.vremove( i ); + } + else { + /* Not transfering and deleting, skip over the item. */ + i += 1; + } + } +} + +/* Set error actions in the start state. */ +void FsmAp::startErrorAction( int ordering, Action *action, int transferPoint ) +{ + /* Make sure the start state has no other entry points. */ + isolateStartState(); + + /* Add the actions. */ + startState->errActionTable.setAction( ordering, action, transferPoint ); +} + +/* Set error actions in all states where there is a transition out. */ +void FsmAp::allErrorAction( int ordering, Action *action, int transferPoint ) +{ + /* Insert actions in the error action table of all states. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) + state->errActionTable.setAction( ordering, action, transferPoint ); +} + +/* Set error actions in final states. */ +void FsmAp::finalErrorAction( int ordering, Action *action, int transferPoint ) +{ + /* Add the action to the error table of final states. */ + for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) + (*state)->errActionTable.setAction( ordering, action, transferPoint ); +} + +void FsmAp::notStartErrorAction( int ordering, Action *action, int transferPoint ) +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( state != startState ) + state->errActionTable.setAction( ordering, action, transferPoint ); + } +} + +void FsmAp::notFinalErrorAction( int ordering, Action *action, int transferPoint ) +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( ! state->isFinState() ) + state->errActionTable.setAction( ordering, action, transferPoint ); + } +} + +/* Set error actions in the states that have transitions into a final state. */ +void FsmAp::middleErrorAction( int ordering, Action *action, int transferPoint ) +{ + /* Isolate the start state in case it is reachable from in inside the + * machine, in which case we don't want it set. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( state != startState && ! state->isFinState() ) + state->errActionTable.setAction( ordering, action, transferPoint ); + } +} + +/* Set EOF actions in the start state. */ +void FsmAp::startEOFAction( int ordering, Action *action ) +{ + /* Make sure the start state has no other entry points. */ + isolateStartState(); + + /* Add the actions. */ + startState->eofActionTable.setAction( ordering, action ); +} + +/* Set EOF actions in all states where there is a transition out. */ +void FsmAp::allEOFAction( int ordering, Action *action ) +{ + /* Insert actions in the EOF action table of all states. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) + state->eofActionTable.setAction( ordering, action ); +} + +/* Set EOF actions in final states. */ +void FsmAp::finalEOFAction( int ordering, Action *action ) +{ + /* Add the action to the error table of final states. */ + for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) + (*state)->eofActionTable.setAction( ordering, action ); +} + +void FsmAp::notStartEOFAction( int ordering, Action *action ) +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( state != startState ) + state->eofActionTable.setAction( ordering, action ); + } +} + +void FsmAp::notFinalEOFAction( int ordering, Action *action ) +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( ! state->isFinState() ) + state->eofActionTable.setAction( ordering, action ); + } +} + +/* Set EOF actions in the states that have transitions into a final state. */ +void FsmAp::middleEOFAction( int ordering, Action *action ) +{ + /* Set the actions in all states that are not the start state and not final. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( state != startState && ! state->isFinState() ) + state->eofActionTable.setAction( ordering, action ); + } +} + +/* + * Set To State Actions. + */ + +/* Set to state actions in the start state. */ +void FsmAp::startToStateAction( int ordering, Action *action ) +{ + /* Make sure the start state has no other entry points. */ + isolateStartState(); + startState->toStateActionTable.setAction( ordering, action ); +} + +/* Set to state actions in all states. */ +void FsmAp::allToStateAction( int ordering, Action *action ) +{ + /* Insert the action on all states. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) + state->toStateActionTable.setAction( ordering, action ); +} + +/* Set to state actions in final states. */ +void FsmAp::finalToStateAction( int ordering, Action *action ) +{ + /* Add the action to the error table of final states. */ + for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) + (*state)->toStateActionTable.setAction( ordering, action ); +} + +void FsmAp::notStartToStateAction( int ordering, Action *action ) +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( state != startState ) + state->toStateActionTable.setAction( ordering, action ); + } +} + +void FsmAp::notFinalToStateAction( int ordering, Action *action ) +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( ! state->isFinState() ) + state->toStateActionTable.setAction( ordering, action ); + } +} + +/* Set to state actions in states that are not final and not the start state. */ +void FsmAp::middleToStateAction( int ordering, Action *action ) +{ + /* Set the action in all states that are not the start state and not final. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( state != startState && ! state->isFinState() ) + state->toStateActionTable.setAction( ordering, action ); + } +} + +/* + * Set From State Actions. + */ + +void FsmAp::startFromStateAction( int ordering, Action *action ) +{ + /* Make sure the start state has no other entry points. */ + isolateStartState(); + startState->fromStateActionTable.setAction( ordering, action ); +} + +void FsmAp::allFromStateAction( int ordering, Action *action ) +{ + /* Insert the action on all states. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) + state->fromStateActionTable.setAction( ordering, action ); +} + +void FsmAp::finalFromStateAction( int ordering, Action *action ) +{ + /* Add the action to the error table of final states. */ + for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) + (*state)->fromStateActionTable.setAction( ordering, action ); +} + +void FsmAp::notStartFromStateAction( int ordering, Action *action ) +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( state != startState ) + state->fromStateActionTable.setAction( ordering, action ); + } +} + +void FsmAp::notFinalFromStateAction( int ordering, Action *action ) +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( ! state->isFinState() ) + state->fromStateActionTable.setAction( ordering, action ); + } +} + +void FsmAp::middleFromStateAction( int ordering, Action *action ) +{ + /* Set the action in all states that are not the start state and not final. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( state != startState && ! state->isFinState() ) + state->fromStateActionTable.setAction( ordering, action ); + } +} + +/* Shift the function ordering of the start transitions to start + * at fromOrder and increase in units of 1. Useful before staring. + * Returns the maximum number of order numbers used. */ +int FsmAp::shiftStartActionOrder( int fromOrder ) +{ + int maxUsed = 0; + + /* Walk the start state's transitions, shifting function ordering. */ + for ( TransList::Iter trans = startState->outList; trans.lte(); trans++ ) { + /* Walk the function data for the transition and set the keys to + * increasing values starting at fromOrder. */ + int curFromOrder = fromOrder; + ActionTable::Iter action = trans->actionTable; + for ( ; action.lte(); action++ ) + action->key = curFromOrder++; + + /* Keep track of the max number of orders used. */ + if ( curFromOrder - fromOrder > maxUsed ) + maxUsed = curFromOrder - fromOrder; + } + + return maxUsed; +} + +/* Remove all priorities. */ +void FsmAp::clearAllPriorities() +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + /* Clear out priority data. */ + state->outPriorTable.empty(); + + /* Clear transition data from the out transitions. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) + trans->priorTable.empty(); + } +} + +/* Zeros out the function ordering keys. This may be called before minimization + * when it is known that no more fsm operations are going to be done. This + * will achieve greater reduction as states will not be separated on the basis + * of function ordering. */ +void FsmAp::nullActionKeys( ) +{ + /* For each state... */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + /* Walk the transitions for the state. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + /* Walk the action table for the transition. */ + for ( ActionTable::Iter action = trans->actionTable; + action.lte(); action++ ) + action->key = 0; + + /* Walk the action table for the transition. */ + for ( LmActionTable::Iter action = trans->lmActionTable; + action.lte(); action++ ) + action->key = 0; + } + + /* Null the action keys of the to state action table. */ + for ( ActionTable::Iter action = state->toStateActionTable; + action.lte(); action++ ) + action->key = 0; + + /* Null the action keys of the from state action table. */ + for ( ActionTable::Iter action = state->fromStateActionTable; + action.lte(); action++ ) + action->key = 0; + + /* Null the action keys of the out transtions. */ + for ( ActionTable::Iter action = state->outActionTable; + action.lte(); action++ ) + action->key = 0; + + /* Null the action keys of the error action table. */ + for ( ErrActionTable::Iter action = state->errActionTable; + action.lte(); action++ ) + action->ordering = 0; + + /* Null the action keys eof action table. */ + for ( ActionTable::Iter action = state->eofActionTable; + action.lte(); action++ ) + action->key = 0; + } +} + +/* Walk the list of states and verify that non final states do not have out + * data, that all stateBits are cleared, and that there are no states with + * zero foreign in transitions. */ +void FsmAp::verifyStates() +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + /* Non final states should not have leaving data. */ + if ( ! (state->stateBits & SB_ISFINAL) ) { + assert( state->outActionTable.length() == 0 ); + assert( state->outCondSet.length() == 0 ); + assert( state->outPriorTable.length() == 0 ); + } + + /* Data used in algorithms should be cleared. */ + assert( (state->stateBits & SB_BOTH) == 0 ); + assert( state->foreignInTrans > 0 ); + } +} + +/* Compare two transitions according to their relative priority. Since the + * base transition has no priority associated with it, the default is to + * return equal. */ +int FsmAp::comparePrior( const PriorTable &priorTable1, const PriorTable &priorTable2 ) +{ + /* Looking for differing priorities on same keys. Need to concurrently + * scan the priority lists. */ + PriorTable::Iter pd1 = priorTable1; + PriorTable::Iter pd2 = priorTable2; + while ( pd1.lte() && pd2.lte() ) { + /* Check keys. */ + if ( pd1->desc->key < pd2->desc->key ) + pd1.increment(); + else if ( pd1->desc->key > pd2->desc->key ) + pd2.increment(); + /* Keys are the same, check priorities. */ + else if ( pd1->desc->priority < pd2->desc->priority ) + return -1; + else if ( pd1->desc->priority > pd2->desc->priority ) + return 1; + else { + /* Keys and priorities are equal, advance both. */ + pd1.increment(); + pd2.increment(); + } + } + + /* No differing priorities on the same key. */ + return 0; +} + +/* Compares two transitions according to priority and functions. Pointers + * should not be null. Does not consider to state or from state. Compare two + * transitions according to the data contained in the transitions. Data means + * any properties added to user transitions that may differentiate them. Since + * the base transition has no data, the default is to return equal. */ +int FsmAp::compareTransData( TransAp *trans1, TransAp *trans2 ) +{ + /* Compare the prior table. */ + int cmpRes = CmpPriorTable::compare( trans1->priorTable, + trans2->priorTable ); + if ( cmpRes != 0 ) + return cmpRes; + + /* Compare longest match action tables. */ + cmpRes = CmpLmActionTable::compare(trans1->lmActionTable, + trans2->lmActionTable); + if ( cmpRes != 0 ) + return cmpRes; + + /* Compare action tables. */ + return CmpActionTable::compare(trans1->actionTable, + trans2->actionTable); +} + +/* Callback invoked when another trans (or possibly this) is added into this + * transition during the merging process. Draw in any properties of srcTrans + * into this transition. AddInTrans is called when a new transitions is made + * that will be a duplicate of another transition or a combination of several + * other transitions. AddInTrans will be called for each transition that the + * new transition is to represent. */ +void FsmAp::addInTrans( TransAp *destTrans, TransAp *srcTrans ) +{ + /* Protect against adding in from ourselves. */ + if ( srcTrans == destTrans ) { + /* Adding in ourselves, need to make a copy of the source transitions. + * The priorities are not copied in as that would have no effect. */ + destTrans->lmActionTable.setActions( LmActionTable(srcTrans->lmActionTable) ); + destTrans->actionTable.setActions( ActionTable(srcTrans->actionTable) ); + } + else { + /* Not a copy of ourself, get the functions and priorities. */ + destTrans->lmActionTable.setActions( srcTrans->lmActionTable ); + destTrans->actionTable.setActions( srcTrans->actionTable ); + destTrans->priorTable.setPriors( srcTrans->priorTable ); + } +} + +/* Compare the properties of states that are embedded by users. Compares out + * priorities, out transitions, to, from, out, error and eof action tables. */ +int FsmAp::compareStateData( const StateAp *state1, const StateAp *state2 ) +{ + /* Compare the out priority table. */ + int cmpRes = CmpPriorTable:: + compare( state1->outPriorTable, state2->outPriorTable ); + if ( cmpRes != 0 ) + return cmpRes; + + /* Test to state action tables. */ + cmpRes = CmpActionTable::compare( state1->toStateActionTable, + state2->toStateActionTable ); + if ( cmpRes != 0 ) + return cmpRes; + + /* Test from state action tables. */ + cmpRes = CmpActionTable::compare( state1->fromStateActionTable, + state2->fromStateActionTable ); + if ( cmpRes != 0 ) + return cmpRes; + + /* Test out action tables. */ + cmpRes = CmpActionTable::compare( state1->outActionTable, + state2->outActionTable ); + if ( cmpRes != 0 ) + return cmpRes; + + /* Test out condition sets. */ + cmpRes = CmpActionSet::compare( state1->outCondSet, + state2->outCondSet ); + if ( cmpRes != 0 ) + return cmpRes; + + /* Test out error action tables. */ + cmpRes = CmpErrActionTable::compare( state1->errActionTable, + state2->errActionTable ); + if ( cmpRes != 0 ) + return cmpRes; + + /* Test eof action tables. */ + return CmpActionTable::compare( state1->eofActionTable, + state2->eofActionTable ); +} + + +/* Invoked when a state looses its final state status and the leaving + * transition embedding data should be deleted. */ +void FsmAp::clearOutData( StateAp *state ) +{ + /* Kill the out actions and priorities. */ + state->outActionTable.empty(); + state->outCondSet.empty(); + state->outPriorTable.empty(); +} + +bool FsmAp::hasOutData( StateAp *state ) +{ + return ( state->outActionTable.length() > 0 || + state->outCondSet.length() > 0 || + state->outPriorTable.length() > 0 ); +} + +/* + * Setting Conditions. + */ + + +void logNewExpansion( Expansion *exp ); +void logCondSpace( CondSpace *condSpace ); + +CondSpace *FsmAp::addCondSpace( const CondSet &condSet ) +{ + CondSpace *condSpace = condData->condSpaceMap.find( condSet ); + if ( condSpace == 0 ) { + Key baseKey = condData->nextCondKey; + condData->nextCondKey += (1 << condSet.length() ) * keyOps->alphSize(); + + condSpace = new CondSpace( condSet ); + condSpace->baseKey = baseKey; + condData->condSpaceMap.insert( condSpace ); + + #ifdef LOG_CONDS + cerr << "adding new condition space" << endl; + cerr << " condition set: "; + logCondSpace( condSpace ); + cerr << endl; + cerr << " baseKey: " << baseKey.getVal() << endl; + #endif + } + return condSpace; +} + +void FsmAp::startFsmCondition( Action *condAction ) +{ + /* Make sure the start state has no other entry points. */ + isolateStartState(); + embedCondition( startState, condAction ); +} + +void FsmAp::allTransCondition( Action *condAction ) +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) + embedCondition( state, condAction ); +} + +void FsmAp::leaveFsmCondition( Action *condAction ) +{ + for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) + (*state)->outCondSet.insert( condAction ); +} diff --git a/contrib/tools/ragel5/ragel/fsmattach.cpp b/contrib/tools/ragel5/ragel/fsmattach.cpp new file mode 100644 index 0000000000..6a90df658a --- /dev/null +++ b/contrib/tools/ragel5/ragel/fsmattach.cpp @@ -0,0 +1,425 @@ +/* + * Copyright 2001 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <string.h> +#include <assert.h> +#include "fsmgraph.h" + +#include <iostream> +using namespace std; + +/* Insert a transition into an inlist. The head must be supplied. */ +void FsmAp::attachToInList( StateAp *from, StateAp *to, + TransAp *&head, TransAp *trans ) +{ + trans->ilnext = head; + trans->ilprev = 0; + + /* If in trans list is not empty, set the head->prev to trans. */ + if ( head != 0 ) + head->ilprev = trans; + + /* Now insert ourselves at the front of the list. */ + head = trans; + + /* Keep track of foreign transitions for from and to. */ + if ( from != to ) { + if ( misfitAccounting ) { + /* If the number of foreign in transitions is about to go up to 1 then + * move it from the misfit list to the main list. */ + if ( to->foreignInTrans == 0 ) + stateList.append( misfitList.detach( to ) ); + } + + to->foreignInTrans += 1; + } +}; + +/* Detach a transition from an inlist. The head of the inlist must be supplied. */ +void FsmAp::detachFromInList( StateAp *from, StateAp *to, + TransAp *&head, TransAp *trans ) +{ + /* Detach in the inTransList. */ + if ( trans->ilprev == 0 ) + head = trans->ilnext; + else + trans->ilprev->ilnext = trans->ilnext; + + if ( trans->ilnext != 0 ) + trans->ilnext->ilprev = trans->ilprev; + + /* Keep track of foreign transitions for from and to. */ + if ( from != to ) { + to->foreignInTrans -= 1; + + if ( misfitAccounting ) { + /* If the number of foreign in transitions goes down to 0 then move it + * from the main list to the misfit list. */ + if ( to->foreignInTrans == 0 ) + misfitList.append( stateList.detach( to ) ); + } + } +} + +/* Attach states on the default transition, range list or on out/in list key. + * First makes a new transition. If there is already a transition out from + * fromState on the default, then will assertion fail. */ +TransAp *FsmAp::attachNewTrans( StateAp *from, StateAp *to, Key lowKey, Key highKey ) +{ + /* Make the new transition. */ + TransAp *retVal = new TransAp(); + + /* The transition is now attached. Remember the parties involved. */ + retVal->fromState = from; + retVal->toState = to; + + /* Make the entry in the out list for the transitions. */ + from->outList.append( retVal ); + + /* Set the the keys of the new trans. */ + retVal->lowKey = lowKey; + retVal->highKey = highKey; + + /* Attach using inList as the head pointer. */ + if ( to != 0 ) + attachToInList( from, to, to->inList.head, retVal ); + + return retVal; +} + +/* Attach for range lists or for the default transition. This attach should + * be used when a transition already is allocated and must be attached to a + * target state. Does not handle adding the transition into the out list. */ +void FsmAp::attachTrans( StateAp *from, StateAp *to, TransAp *trans ) +{ + assert( trans->fromState == 0 && trans->toState == 0 ); + trans->fromState = from; + trans->toState = to; + + if ( to != 0 ) { + /* Attach using the inList pointer as the head pointer. */ + attachToInList( from, to, to->inList.head, trans ); + } +} + +/* Redirect a transition away from error and towards some state. This is just + * like attachTrans except it requires fromState to be set and does not touch + * it. */ +void FsmAp::redirectErrorTrans( StateAp *from, StateAp *to, TransAp *trans ) +{ + assert( trans->fromState != 0 && trans->toState == 0 ); + trans->toState = to; + + if ( to != 0 ) { + /* Attach using the inList pointer as the head pointer. */ + attachToInList( from, to, to->inList.head, trans ); + } +} + +/* Detach for out/in lists or for default transition. */ +void FsmAp::detachTrans( StateAp *from, StateAp *to, TransAp *trans ) +{ + assert( trans->fromState == from && trans->toState == to ); + trans->fromState = 0; + trans->toState = 0; + + if ( to != 0 ) { + /* Detach using to's inList pointer as the head. */ + detachFromInList( from, to, to->inList.head, trans ); + } +} + + +/* Detach a state from the graph. Detaches and deletes transitions in and out + * of the state. Empties inList and outList. Removes the state from the final + * state set. A detached state becomes useless and should be deleted. */ +void FsmAp::detachState( StateAp *state ) +{ + /* Detach the in transitions from the inList list of transitions. */ + while ( state->inList.head != 0 ) { + /* Get pointers to the trans and the state. */ + TransAp *trans = state->inList.head; + StateAp *fromState = trans->fromState; + + /* Detach the transitions from the source state. */ + detachTrans( fromState, state, trans ); + + /* Ok to delete the transition. */ + fromState->outList.detach( trans ); + delete trans; + } + + /* Remove the entry points in on the machine. */ + while ( state->entryIds.length() > 0 ) + unsetEntry( state->entryIds[0], state ); + + /* Detach out range transitions. */ + for ( TransList::Iter trans = state->outList; trans.lte(); ) { + TransList::Iter next = trans.next(); + detachTrans( state, trans->toState, trans ); + delete trans; + trans = next; + } + + /* Delete all of the out range pointers. */ + state->outList.abandon(); + + /* Unset final stateness before detaching from graph. */ + if ( state->stateBits & SB_ISFINAL ) + finStateSet.remove( state ); +} + + +/* Duplicate a transition. Makes a new transition that is attached to the same + * dest as srcTrans. The new transition has functions and priority taken from + * srcTrans. Used for merging a transition in to a free spot. The trans can + * just be dropped in. It does not conflict with an existing trans and need + * not be crossed. Returns the new transition. */ +TransAp *FsmAp::dupTrans( StateAp *from, TransAp *srcTrans ) +{ + /* Make a new transition. */ + TransAp *newTrans = new TransAp(); + + /* We can attach the transition, one does not exist. */ + attachTrans( from, srcTrans->toState, newTrans ); + + /* Call the user callback to add in the original source transition. */ + addInTrans( newTrans, srcTrans ); + + return newTrans; +} + +/* In crossing, src trans and dest trans both go to existing states. Make one + * state from the sets of states that src and dest trans go to. */ +TransAp *FsmAp::fsmAttachStates( MergeData &md, StateAp *from, + TransAp *destTrans, TransAp *srcTrans ) +{ + /* The priorities are equal. We must merge the transitions. Does the + * existing trans go to the state we are to attach to? ie, are we to + * simply double up the transition? */ + StateAp *toState = srcTrans->toState; + StateAp *existingState = destTrans->toState; + + if ( existingState == toState ) { + /* The transition is a double up to the same state. Copy the src + * trans into itself. We don't need to merge in the from out trans + * data, that was done already. */ + addInTrans( destTrans, srcTrans ); + } + else { + /* The trans is not a double up. Dest trans cannot be the same as src + * trans. Set up the state set. */ + StateSet stateSet; + + /* We go to all the states the existing trans goes to, plus... */ + if ( existingState->stateDictEl == 0 ) + stateSet.insert( existingState ); + else + stateSet.insert( existingState->stateDictEl->stateSet ); + + /* ... all the states that we have been told to go to. */ + if ( toState->stateDictEl == 0 ) + stateSet.insert( toState ); + else + stateSet.insert( toState->stateDictEl->stateSet ); + + /* Look for the state. If it is not there already, make it. */ + StateDictEl *lastFound; + if ( md.stateDict.insert( stateSet, &lastFound ) ) { + /* Make a new state representing the combination of states in + * stateSet. It gets added to the fill list. This means that we + * need to fill in it's transitions sometime in the future. We + * don't do that now (ie, do not recurse). */ + StateAp *combinState = addState(); + + /* Link up the dict element and the state. */ + lastFound->targState = combinState; + combinState->stateDictEl = lastFound; + + /* Add to the fill list. */ + md.fillListAppend( combinState ); + } + + /* Get the state insertted/deleted. */ + StateAp *targ = lastFound->targState; + + /* Detach the state from existing state. */ + detachTrans( from, existingState, destTrans ); + + /* Re-attach to the new target. */ + attachTrans( from, targ, destTrans ); + + /* Add in src trans to the existing transition that we redirected to + * the new state. We don't need to merge in the from out trans data, + * that was done already. */ + addInTrans( destTrans, srcTrans ); + } + + return destTrans; +} + +/* Two transitions are to be crossed, handle the possibility of either going + * to the error state. */ +TransAp *FsmAp::mergeTrans( MergeData &md, StateAp *from, + TransAp *destTrans, TransAp *srcTrans ) +{ + TransAp *retTrans = 0; + if ( destTrans->toState == 0 && srcTrans->toState == 0 ) { + /* Error added into error. */ + addInTrans( destTrans, srcTrans ); + retTrans = destTrans; + } + else if ( destTrans->toState == 0 && srcTrans->toState != 0 ) { + /* Non error added into error we need to detach and reattach, */ + detachTrans( from, destTrans->toState, destTrans ); + attachTrans( from, srcTrans->toState, destTrans ); + addInTrans( destTrans, srcTrans ); + retTrans = destTrans; + } + else if ( srcTrans->toState == 0 ) { + /* Dest goes somewhere but src doesn't, just add it it in. */ + addInTrans( destTrans, srcTrans ); + retTrans = destTrans; + } + else { + /* Both go somewhere, run the actual cross. */ + retTrans = fsmAttachStates( md, from, destTrans, srcTrans ); + } + + return retTrans; +} + +/* Find the trans with the higher priority. If src is lower priority then dest then + * src is ignored. If src is higher priority than dest, then src overwrites dest. If + * the priorities are equal, then they are merged. */ +TransAp *FsmAp::crossTransitions( MergeData &md, StateAp *from, + TransAp *destTrans, TransAp *srcTrans ) +{ + TransAp *retTrans; + + /* Compare the priority of the dest and src transitions. */ + int compareRes = comparePrior( destTrans->priorTable, srcTrans->priorTable ); + if ( compareRes < 0 ) { + /* Src trans has a higher priority than dest, src overwrites dest. + * Detach dest and return a copy of src. */ + detachTrans( from, destTrans->toState, destTrans ); + retTrans = dupTrans( from, srcTrans ); + } + else if ( compareRes > 0 ) { + /* The dest trans has a higher priority, use dest. */ + retTrans = destTrans; + } + else { + /* Src trans and dest trans have the same priority, they must be merged. */ + retTrans = mergeTrans( md, from, destTrans, srcTrans ); + } + + /* Return the transition that resulted from the cross. */ + return retTrans; +} + +/* Copy the transitions in srcList to the outlist of dest. The srcList should + * not be the outList of dest, otherwise you would be copying the contents of + * srcList into itself as it's iterated: bad news. */ +void FsmAp::outTransCopy( MergeData &md, StateAp *dest, TransAp *srcList ) +{ + /* The destination list. */ + TransList destList; + + /* Set up an iterator to stop at breaks. */ + PairIter<TransAp> outPair( dest->outList.head, srcList ); + for ( ; !outPair.end(); outPair++ ) { + switch ( outPair.userState ) { + case RangeInS1: { + /* The pair iter is the authority on the keys. It may have needed + * to break the dest range. */ + TransAp *destTrans = outPair.s1Tel.trans; + destTrans->lowKey = outPair.s1Tel.lowKey; + destTrans->highKey = outPair.s1Tel.highKey; + destList.append( destTrans ); + break; + } + case RangeInS2: { + /* Src range may get crossed with dest's default transition. */ + TransAp *newTrans = dupTrans( dest, outPair.s2Tel.trans ); + + /* Set up the transition's keys and append to the dest list. */ + newTrans->lowKey = outPair.s2Tel.lowKey; + newTrans->highKey = outPair.s2Tel.highKey; + destList.append( newTrans ); + break; + } + case RangeOverlap: { + /* Exact overlap, cross them. */ + TransAp *newTrans = crossTransitions( md, dest, + outPair.s1Tel.trans, outPair.s2Tel.trans ); + + /* Set up the transition's keys and append to the dest list. */ + newTrans->lowKey = outPair.s1Tel.lowKey; + newTrans->highKey = outPair.s1Tel.highKey; + destList.append( newTrans ); + break; + } + case BreakS1: { + /* Since we are always writing to the dest trans, the dest needs + * to be copied when it is broken. The copy goes into the first + * half of the break to "break it off". */ + outPair.s1Tel.trans = dupTrans( dest, outPair.s1Tel.trans ); + break; + } + case BreakS2: + break; + } + } + + /* Abandon the old outList and transfer destList into it. */ + dest->outList.transfer( destList ); +} + + +/* Move all the transitions that go into src so that they go into dest. */ +void FsmAp::inTransMove( StateAp *dest, StateAp *src ) +{ + /* Do not try to move in trans to and from the same state. */ + assert( dest != src ); + + /* If src is the start state, dest becomes the start state. */ + if ( src == startState ) { + unsetStartState(); + setStartState( dest ); + } + + /* For each entry point into, create an entry point into dest, when the + * state is detached, the entry points to src will be removed. */ + for ( EntryIdSet::Iter enId = src->entryIds; enId.lte(); enId++ ) + changeEntry( *enId, dest, src ); + + /* Move the transitions in inList. */ + while ( src->inList.head != 0 ) { + /* Get trans and from state. */ + TransAp *trans = src->inList.head; + StateAp *fromState = trans->fromState; + + /* Detach from src, reattach to dest. */ + detachTrans( fromState, src, trans ); + attachTrans( fromState, dest, trans ); + } +} diff --git a/contrib/tools/ragel5/ragel/fsmbase.cpp b/contrib/tools/ragel5/ragel/fsmbase.cpp new file mode 100644 index 0000000000..f1d7141c09 --- /dev/null +++ b/contrib/tools/ragel5/ragel/fsmbase.cpp @@ -0,0 +1,598 @@ +/* + * Copyright 2001-2007 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <string.h> +#include <assert.h> +#include "fsmgraph.h" + +/* Simple singly linked list append routine for the fill list. The new state + * goes to the end of the list. */ +void MergeData::fillListAppend( StateAp *state ) +{ + state->alg.next = 0; + + if ( stfillHead == 0 ) { + /* List is empty, state becomes head and tail. */ + stfillHead = state; + stfillTail = state; + } + else { + /* List is not empty, state goes after last element. */ + stfillTail->alg.next = state; + stfillTail = state; + } +} + +/* Graph constructor. */ +FsmAp::FsmAp() +: + /* No start state. */ + startState(0), + errState(0), + + /* Misfit accounting is a switch, turned on only at specific times. It + * controls what happens when states have no way in from the outside + * world.. */ + misfitAccounting(false) +{ +} + +/* Copy all graph data including transitions. */ +FsmAp::FsmAp( const FsmAp &graph ) +: + /* Lists start empty. Will be filled by copy. */ + stateList(), + misfitList(), + + /* Copy in the entry points, + * pointers will be resolved later. */ + entryPoints(graph.entryPoints), + startState(graph.startState), + errState(0), + + /* Will be filled by copy. */ + finStateSet(), + + /* Misfit accounting is only on during merging. */ + misfitAccounting(false) +{ + /* Create the states and record their map in the original state. */ + StateList::Iter origState = graph.stateList; + for ( ; origState.lte(); origState++ ) { + /* Make the new state. */ + StateAp *newState = new StateAp( *origState ); + + /* Add the state to the list. */ + stateList.append( newState ); + + /* Set the mapsTo item of the old state. */ + origState->alg.stateMap = newState; + } + + /* Derefernce all the state maps. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + /* The points to the original in the src machine. The taget's duplicate + * is in the statemap. */ + StateAp *toState = trans->toState != 0 ? trans->toState->alg.stateMap : 0; + + /* Attach The transition to the duplicate. */ + trans->toState = 0; + attachTrans( state, toState, trans ); + } + } + + /* Fix the state pointers in the entry points array. */ + EntryMapEl *eel = entryPoints.data; + for ( int e = 0; e < entryPoints.length(); e++, eel++ ) { + /* Get the duplicate of the state. */ + eel->value = eel->value->alg.stateMap; + + /* Foreign in transitions must be built up when duping machines so + * increment it here. */ + eel->value->foreignInTrans += 1; + } + + /* Fix the start state pointer and the new start state's count of in + * transiions. */ + startState = startState->alg.stateMap; + startState->foreignInTrans += 1; + + /* Build the final state set. */ + StateSet::Iter st = graph.finStateSet; + for ( ; st.lte(); st++ ) + finStateSet.insert((*st)->alg.stateMap); +} + +/* Deletes all transition data then deletes each state. */ +FsmAp::~FsmAp() +{ + /* Delete all the transitions. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + /* Iterate the out transitions, deleting them. */ + state->outList.empty(); + } + + /* Delete all the states. */ + stateList.empty(); +} + +/* Set a state final. The state has its isFinState set to true and the state + * is added to the finStateSet. */ +void FsmAp::setFinState( StateAp *state ) +{ + /* Is it already a fin state. */ + if ( state->stateBits & SB_ISFINAL ) + return; + + state->stateBits |= SB_ISFINAL; + finStateSet.insert( state ); +} + +/* Set a state non-final. The has its isFinState flag set false and the state + * is removed from the final state set. */ +void FsmAp::unsetFinState( StateAp *state ) +{ + /* Is it already a non-final state? */ + if ( ! (state->stateBits & SB_ISFINAL) ) + return; + + /* When a state looses its final state status it must relinquish all the + * properties that are allowed only for final states. */ + clearOutData( state ); + + state->stateBits &= ~ SB_ISFINAL; + finStateSet.remove( state ); +} + +/* Set and unset a state as the start state. */ +void FsmAp::setStartState( StateAp *state ) +{ + /* Sould change from unset to set. */ + assert( startState == 0 ); + startState = state; + + if ( misfitAccounting ) { + /* If the number of foreign in transitions is about to go up to 1 then + * take it off the misfit list and put it on the head list. */ + if ( state->foreignInTrans == 0 ) + stateList.append( misfitList.detach( state ) ); + } + + /* Up the foreign in transitions to the state. */ + state->foreignInTrans += 1; +} + +void FsmAp::unsetStartState() +{ + /* Should change from set to unset. */ + assert( startState != 0 ); + + /* Decrement the entry's count of foreign entries. */ + startState->foreignInTrans -= 1; + + if ( misfitAccounting ) { + /* If the number of foreign in transitions just went down to 0 then take + * it off the main list and put it on the misfit list. */ + if ( startState->foreignInTrans == 0 ) + misfitList.append( stateList.detach( startState ) ); + } + + startState = 0; +} + +/* Associate an id with a state. Makes the state a named entry point. Has no + * effect if the entry point is already mapped to the state. */ +void FsmAp::setEntry( int id, StateAp *state ) +{ + /* Insert the id into the state. If the state is already labelled with id, + * nothing to do. */ + if ( state->entryIds.insert( id ) ) { + /* Insert the entry and assert that it succeeds. */ + entryPoints.insertMulti( id, state ); + + if ( misfitAccounting ) { + /* If the number of foreign in transitions is about to go up to 1 then + * take it off the misfit list and put it on the head list. */ + if ( state->foreignInTrans == 0 ) + stateList.append( misfitList.detach( state ) ); + } + + /* Up the foreign in transitions to the state. */ + state->foreignInTrans += 1; + } +} + +/* Remove the association of an id with a state. The state looses it's entry + * point status. Assumes that the id is indeed mapped to state. */ +void FsmAp::unsetEntry( int id, StateAp *state ) +{ + /* Find the entry point in on id. */ + EntryMapEl *enLow = 0, *enHigh = 0; + entryPoints.findMulti( id, enLow, enHigh ); + while ( enLow->value != state ) + enLow += 1; + + /* Remove the record from the map. */ + entryPoints.remove( enLow ); + + /* Remove the state's sense of the link. */ + state->entryIds.remove( id ); + state->foreignInTrans -= 1; + if ( misfitAccounting ) { + /* If the number of foreign in transitions just went down to 0 then take + * it off the main list and put it on the misfit list. */ + if ( state->foreignInTrans == 0 ) + misfitList.append( stateList.detach( state ) ); + } +} + +/* Remove all association of an id with states. Assumes that the id is indeed + * mapped to a state. */ +void FsmAp::unsetEntry( int id ) +{ + /* Find the entry point in on id. */ + EntryMapEl *enLow = 0, *enHigh = 0; + entryPoints.findMulti( id, enLow, enHigh ); + for ( EntryMapEl *mel = enLow; mel <= enHigh; mel++ ) { + /* Remove the state's sense of the link. */ + mel->value->entryIds.remove( id ); + mel->value->foreignInTrans -= 1; + if ( misfitAccounting ) { + /* If the number of foreign in transitions just went down to 0 + * then take it off the main list and put it on the misfit list. */ + if ( mel->value->foreignInTrans == 0 ) + misfitList.append( stateList.detach( mel->value ) ); + } + } + + /* Remove the records from the entry points map. */ + entryPoints.removeMulti( enLow, enHigh ); +} + + +void FsmAp::changeEntry( int id, StateAp *to, StateAp *from ) +{ + /* Find the entry in the entry map. */ + EntryMapEl *enLow = 0, *enHigh = 0; + entryPoints.findMulti( id, enLow, enHigh ); + while ( enLow->value != from ) + enLow += 1; + + /* Change it to the new target. */ + enLow->value = to; + + /* Remove from's sense of the link. */ + from->entryIds.remove( id ); + from->foreignInTrans -= 1; + if ( misfitAccounting ) { + /* If the number of foreign in transitions just went down to 0 then take + * it off the main list and put it on the misfit list. */ + if ( from->foreignInTrans == 0 ) + misfitList.append( stateList.detach( from ) ); + } + + /* Add to's sense of the link. */ + if ( to->entryIds.insert( id ) != 0 ) { + if ( misfitAccounting ) { + /* If the number of foreign in transitions is about to go up to 1 then + * take it off the misfit list and put it on the head list. */ + if ( to->foreignInTrans == 0 ) + stateList.append( misfitList.detach( to ) ); + } + + /* Up the foreign in transitions to the state. */ + to->foreignInTrans += 1; + } +} + + +/* Clear all entry points from a machine. */ +void FsmAp::unsetAllEntryPoints() +{ + for ( EntryMap::Iter en = entryPoints; en.lte(); en++ ) { + /* Kill all the state's entry points at once. */ + if ( en->value->entryIds.length() > 0 ) { + en->value->foreignInTrans -= en->value->entryIds.length(); + + if ( misfitAccounting ) { + /* If the number of foreign in transitions just went down to 0 + * then take it off the main list and put it on the misfit + * list. */ + if ( en->value->foreignInTrans == 0 ) + misfitList.append( stateList.detach( en->value ) ); + } + + /* Clear the set of ids out all at once. */ + en->value->entryIds.empty(); + } + } + + /* Now clear out the entry map all at once. */ + entryPoints.empty(); +} + +/* Assigning an epsilon transition into final states. */ +void FsmAp::epsilonTrans( int id ) +{ + for ( StateSet::Iter fs = finStateSet; fs.lte(); fs++ ) + (*fs)->epsilonTrans.append( id ); +} + +/* Mark all states reachable from state. Traverses transitions forward. Used + * for removing states that have no path into them. */ +void FsmAp::markReachableFromHere( StateAp *state ) +{ + /* Base case: return; */ + if ( state->stateBits & SB_ISMARKED ) + return; + + /* Set this state as processed. We are going to visit all states that this + * state has a transition to. */ + state->stateBits |= SB_ISMARKED; + + /* Recurse on all out transitions. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + if ( trans->toState != 0 ) + markReachableFromHere( trans->toState ); + } +} + +void FsmAp::markReachableFromHereStopFinal( StateAp *state ) +{ + /* Base case: return; */ + if ( state->stateBits & SB_ISMARKED ) + return; + + /* Set this state as processed. We are going to visit all states that this + * state has a transition to. */ + state->stateBits |= SB_ISMARKED; + + /* Recurse on all out transitions. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + StateAp *toState = trans->toState; + if ( toState != 0 && !toState->isFinState() ) + markReachableFromHereStopFinal( toState ); + } +} + +/* Mark all states reachable from state. Traverse transitions backwards. Used + * for removing dead end paths in graphs. */ +void FsmAp::markReachableFromHereReverse( StateAp *state ) +{ + /* Base case: return; */ + if ( state->stateBits & SB_ISMARKED ) + return; + + /* Set this state as processed. We are going to visit all states with + * transitions into this state. */ + state->stateBits |= SB_ISMARKED; + + /* Recurse on all items in transitions. */ + for ( TransInList::Iter trans = state->inList; trans.lte(); trans++ ) + markReachableFromHereReverse( trans->fromState ); +} + +/* Determine if there are any entry points into a start state other than the + * start state. Setting starting transitions requires that the start state be + * isolated. In most cases a start state will already be isolated. */ +bool FsmAp::isStartStateIsolated() +{ + /* If there are any in transitions then the state is not isolated. */ + if ( startState->inList.head != 0 ) + return false; + + /* If there are any entry points then isolated. */ + if ( startState->entryIds.length() > 0 ) + return false; + + return true; +} + +/* Bring in other's entry points. Assumes others states are going to be + * copied into this machine. */ +void FsmAp::copyInEntryPoints( FsmAp *other ) +{ + /* Use insert multi because names are not unique. */ + for ( EntryMap::Iter en = other->entryPoints; en.lte(); en++ ) + entryPoints.insertMulti( en->key, en->value ); +} + + +void FsmAp::unsetAllFinStates() +{ + for ( StateSet::Iter st = finStateSet; st.lte(); st++ ) + (*st)->stateBits &= ~ SB_ISFINAL; + finStateSet.empty(); +} + +void FsmAp::setFinBits( int finStateBits ) +{ + for ( int s = 0; s < finStateSet.length(); s++ ) + finStateSet.data[s]->stateBits |= finStateBits; +} + + +/* Tests the integrity of the transition lists and the fromStates. */ +void FsmAp::verifyIntegrity() +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + /* Walk the out transitions and assert fromState is correct. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) + assert( trans->fromState == state ); + + /* Walk the inlist and assert toState is correct. */ + for ( TransInList::Iter trans = state->inList; trans.lte(); trans++ ) + assert( trans->toState == state ); + } +} + +void FsmAp::verifyReachability() +{ + /* Mark all the states that can be reached + * through the set of entry points. */ + markReachableFromHere( startState ); + for ( EntryMap::Iter en = entryPoints; en.lte(); en++ ) + markReachableFromHere( en->value ); + + /* Check that everything got marked. */ + for ( StateList::Iter st = stateList; st.lte(); st++ ) { + /* Assert it got marked and then clear the mark. */ + assert( st->stateBits & SB_ISMARKED ); + st->stateBits &= ~ SB_ISMARKED; + } +} + +void FsmAp::verifyNoDeadEndStates() +{ + /* Mark all states that have paths to the final states. */ + for ( StateSet::Iter pst = finStateSet; pst.lte(); pst++ ) + markReachableFromHereReverse( *pst ); + + /* Start state gets honorary marking. Must be done AFTER recursive call. */ + startState->stateBits |= SB_ISMARKED; + + /* Make sure everything got marked. */ + for ( StateList::Iter st = stateList; st.lte(); st++ ) { + /* Assert the state got marked and unmark it. */ + assert( st->stateBits & SB_ISMARKED ); + st->stateBits &= ~ SB_ISMARKED; + } +} + +void FsmAp::depthFirstOrdering( StateAp *state ) +{ + /* Nothing to do if the state is already on the list. */ + if ( state->stateBits & SB_ONLIST ) + return; + + /* Doing depth first, put state on the list. */ + state->stateBits |= SB_ONLIST; + stateList.append( state ); + + /* Recurse on everything ranges. */ + for ( TransList::Iter tel = state->outList; tel.lte(); tel++ ) { + if ( tel->toState != 0 ) + depthFirstOrdering( tel->toState ); + } +} + +/* Ordering states by transition connections. */ +void FsmAp::depthFirstOrdering() +{ + /* Init on state list flags. */ + for ( StateList::Iter st = stateList; st.lte(); st++ ) + st->stateBits &= ~SB_ONLIST; + + /* Clear out the state list, we will rebuild it. */ + int stateListLen = stateList.length(); + stateList.abandon(); + + /* Add back to the state list from the start state and all other entry + * points. */ + if ( errState != 0 ) + depthFirstOrdering( errState ); + depthFirstOrdering( startState ); + for ( EntryMap::Iter en = entryPoints; en.lte(); en++ ) + depthFirstOrdering( en->value ); + + /* Make sure we put everything back on. */ + assert( stateListLen == stateList.length() ); +} + +/* Stable sort the states by final state status. */ +void FsmAp::sortStatesByFinal() +{ + /* Move forward through the list and throw final states onto the end. */ + StateAp *state = 0; + StateAp *next = stateList.head; + StateAp *last = stateList.tail; + while ( state != last ) { + /* Move forward and load up the next. */ + state = next; + next = state->next; + + /* Throw to the end? */ + if ( state->isFinState() ) { + stateList.detach( state ); + stateList.append( state ); + } + } +} + +void FsmAp::setStateNumbers( int base ) +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) + state->alg.stateNum = base++; +} + + +bool FsmAp::checkErrTrans( StateAp *state, TransAp *trans ) +{ + /* Might go directly to error state. */ + if ( trans->toState == 0 ) + return true; + + if ( trans->prev == 0 ) { + /* If this is the first transition. */ + if ( keyOps->minKey < trans->lowKey ) + return true; + } + else { + /* Not the first transition. Compare against the prev. */ + TransAp *prev = trans->prev; + Key nextKey = prev->highKey; + nextKey.increment(); + if ( nextKey < trans->lowKey ) + return true; + } + return false; +} + +bool FsmAp::checkErrTransFinish( StateAp *state ) +{ + /* Check if there are any ranges already. */ + if ( state->outList.length() == 0 ) + return true; + else { + /* Get the last and check for a gap on the end. */ + TransAp *last = state->outList.tail; + if ( last->highKey < keyOps->maxKey ) + return true; + } + return 0; +} + +bool FsmAp::hasErrorTrans() +{ + bool result; + for ( StateList::Iter st = stateList; st.lte(); st++ ) { + for ( TransList::Iter tr = st->outList; tr.lte(); tr++ ) { + result = checkErrTrans( st, tr ); + if ( result ) + return true; + } + result = checkErrTransFinish( st ); + if ( result ) + return true; + } + return false; +} diff --git a/contrib/tools/ragel5/ragel/fsmgraph.cpp b/contrib/tools/ragel5/ragel/fsmgraph.cpp new file mode 100644 index 0000000000..d7d0ba4fe2 --- /dev/null +++ b/contrib/tools/ragel5/ragel/fsmgraph.cpp @@ -0,0 +1,1426 @@ +/* + * Copyright 2001, 2002, 2006 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <assert.h> +#include <iostream> + +#include "fsmgraph.h" +#include "mergesort.h" +#include "parsedata.h" + +using std::cerr; +using std::endl; + +/* Make a new state. The new state will be put on the graph's + * list of state. The new state can be created final or non final. */ +StateAp *FsmAp::addState() +{ + /* Make the new state to return. */ + StateAp *state = new StateAp(); + + if ( misfitAccounting ) { + /* Create the new state on the misfit list. All states are created + * with no foreign in transitions. */ + misfitList.append( state ); + } + else { + /* Create the new state. */ + stateList.append( state ); + } + + return state; +} + +/* Construct an FSM that is the concatenation of an array of characters. A new + * machine will be made that has len+1 states with one transition between each + * state for each integer in str. IsSigned determines if the integers are to + * be considered as signed or unsigned ints. */ +void FsmAp::concatFsm( Key *str, int len ) +{ + /* Make the first state and set it as the start state. */ + StateAp *last = addState(); + setStartState( last ); + + /* Attach subsequent states. */ + for ( int i = 0; i < len; i++ ) { + StateAp *newState = addState(); + attachNewTrans( last, newState, str[i], str[i] ); + last = newState; + } + + /* Make the last state the final state. */ + setFinState( last ); +} + +/* Case insensitive version of concatFsm. */ +void FsmAp::concatFsmCI( Key *str, int len ) +{ + /* Make the first state and set it as the start state. */ + StateAp *last = addState(); + setStartState( last ); + + /* Attach subsequent states. */ + for ( int i = 0; i < len; i++ ) { + StateAp *newState = addState(); + + KeySet keySet; + if ( str[i].isLower() ) + keySet.insert( str[i].toUpper() ); + if ( str[i].isUpper() ) + keySet.insert( str[i].toLower() ); + keySet.insert( str[i] ); + + for ( int i = 0; i < keySet.length(); i++ ) + attachNewTrans( last, newState, keySet[i], keySet[i] ); + + last = newState; + } + + /* Make the last state the final state. */ + setFinState( last ); +} + +/* Construct a machine that matches one character. A new machine will be made + * that has two states with a single transition between the states. IsSigned + * determines if the integers are to be considered as signed or unsigned ints. */ +void FsmAp::concatFsm( Key chr ) +{ + /* Two states first start, second final. */ + setStartState( addState() ); + + StateAp *end = addState(); + setFinState( end ); + + /* Attach on the character. */ + attachNewTrans( startState, end, chr, chr ); +} + +/* Construct a machine that matches any character in set. A new machine will + * be made that has two states and len transitions between the them. The set + * should be ordered correctly accroding to KeyOps and should not contain + * any duplicates. */ +void FsmAp::orFsm( Key *set, int len ) +{ + /* Two states first start, second final. */ + setStartState( addState() ); + + StateAp *end = addState(); + setFinState( end ); + + for ( int i = 1; i < len; i++ ) + assert( set[i-1] < set[i] ); + + /* Attach on all the integers in the given string of ints. */ + for ( int i = 0; i < len; i++ ) + attachNewTrans( startState, end, set[i], set[i] ); +} + +/* Construct a machine that matches a range of characters. A new machine will + * be made with two states and a range transition between them. The range will + * match any characters from low to high inclusive. Low should be less than or + * equal to high otherwise undefined behaviour results. IsSigned determines + * if the integers are to be considered as signed or unsigned ints. */ +void FsmAp::rangeFsm( Key low, Key high ) +{ + /* Two states first start, second final. */ + setStartState( addState() ); + + StateAp *end = addState(); + setFinState( end ); + + /* Attach using the range of characters. */ + attachNewTrans( startState, end, low, high ); +} + +/* Construct a machine that a repeated range of characters. */ +void FsmAp::rangeStarFsm( Key low, Key high) +{ + /* One state which is final and is the start state. */ + setStartState( addState() ); + setFinState( startState ); + + /* Attach start to start using range of characters. */ + attachNewTrans( startState, startState, low, high ); +} + +/* Construct a machine that matches the empty string. A new machine will be + * made with only one state. The new state will be both a start and final + * state. IsSigned determines if the machine has a signed or unsigned + * alphabet. Fsm operations must be done on machines with the same alphabet + * signedness. */ +void FsmAp::lambdaFsm( ) +{ + /* Give it one state with no transitions making it + * the start state and final state. */ + setStartState( addState() ); + setFinState( startState ); +} + +/* Construct a machine that matches nothing at all. A new machine will be + * made with only one state. It will not be final. */ +void FsmAp::emptyFsm( ) +{ + /* Give it one state with no transitions making it + * the start state and final state. */ + setStartState( addState() ); +} + +void FsmAp::transferOutData( StateAp *destState, StateAp *srcState ) +{ + for ( TransList::Iter trans = destState->outList; trans.lte(); trans++ ) { + if ( trans->toState != 0 ) { + /* Get the actions data from the outActionTable. */ + trans->actionTable.setActions( srcState->outActionTable ); + + /* Get the priorities from the outPriorTable. */ + trans->priorTable.setPriors( srcState->outPriorTable ); + } + } +} + +/* Kleene star operator. Makes this machine the kleene star of itself. Any + * transitions made going out of the machine and back into itself will be + * notified that they are leaving transitions by having the leavingFromState + * callback invoked. */ +void FsmAp::starOp( ) +{ + /* For the merging process. */ + MergeData md; + + /* Turn on misfit accounting to possibly catch the old start state. */ + setMisfitAccounting( true ); + + /* Create the new new start state. It will be set final after the merging + * of the final states with the start state is complete. */ + StateAp *prevStartState = startState; + unsetStartState(); + setStartState( addState() ); + + /* Merge the new start state with the old one to isolate it. */ + mergeStates( md, startState, prevStartState ); + + /* Merge the start state into all final states. Except the start state on + * the first pass. If the start state is set final we will be doubling up + * its transitions, which will get transfered to any final states that + * follow it in the final state set. This will be determined by the order + * of items in the final state set. To prevent this we just merge with the + * start on a second pass. */ + for ( StateSet::Iter st = finStateSet; st.lte(); st++ ) { + if ( *st != startState ) + mergeStatesLeaving( md, *st, startState ); + } + + /* Now it is safe to merge the start state with itself (provided it + * is set final). */ + if ( startState->isFinState() ) + mergeStatesLeaving( md, startState, startState ); + + /* Now ensure the new start state is a final state. */ + setFinState( startState ); + + /* Fill in any states that were newed up as combinations of others. */ + fillInStates( md ); + + /* Remove the misfits and turn off misfit accounting. */ + removeMisfits(); + setMisfitAccounting( false ); +} + +void FsmAp::repeatOp( int times ) +{ + /* Must be 1 and up. 0 produces null machine and requires deleting this. */ + assert( times > 0 ); + + /* A repeat of one does absolutely nothing. */ + if ( times == 1 ) + return; + + /* Make a machine to make copies from. */ + FsmAp *copyFrom = new FsmAp( *this ); + + /* Concatentate duplicates onto the end up until before the last. */ + for ( int i = 1; i < times-1; i++ ) { + FsmAp *dup = new FsmAp( *copyFrom ); + doConcat( dup, 0, false ); + } + + /* Now use the copyFrom on the end. */ + doConcat( copyFrom, 0, false ); +} + +void FsmAp::optionalRepeatOp( int times ) +{ + /* Must be 1 and up. 0 produces null machine and requires deleting this. */ + assert( times > 0 ); + + /* A repeat of one optional merely allows zero string. */ + if ( times == 1 ) { + setFinState( startState ); + return; + } + + /* Make a machine to make copies from. */ + FsmAp *copyFrom = new FsmAp( *this ); + + /* The state set used in the from end of the concatentation. Starts with + * the initial final state set, then after each concatenation, gets set to + * the the final states that come from the the duplicate. */ + StateSet lastFinSet( finStateSet ); + + /* Set the initial state to zero to allow zero copies. */ + setFinState( startState ); + + /* Concatentate duplicates onto the end up until before the last. */ + for ( int i = 1; i < times-1; i++ ) { + /* Make a duplicate for concating and set the fin bits to graph 2 so we + * can pick out it's final states after the optional style concat. */ + FsmAp *dup = new FsmAp( *copyFrom ); + dup->setFinBits( SB_GRAPH2 ); + doConcat( dup, &lastFinSet, true ); + + /* Clear the last final state set and make the new one by taking only + * the final states that come from graph 2.*/ + lastFinSet.empty(); + for ( int i = 0; i < finStateSet.length(); i++ ) { + /* If the state came from graph 2, add it to the last set and clear + * the bits. */ + StateAp *fs = finStateSet[i]; + if ( fs->stateBits & SB_GRAPH2 ) { + lastFinSet.insert( fs ); + fs->stateBits &= ~SB_GRAPH2; + } + } + } + + /* Now use the copyFrom on the end, no bits set, no bits to clear. */ + doConcat( copyFrom, &lastFinSet, true ); +} + + +/* Fsm concatentation worker. Supports treating the concatentation as optional, + * which essentially leaves the final states of machine one as final. */ +void FsmAp::doConcat( FsmAp *other, StateSet *fromStates, bool optional ) +{ + /* For the merging process. */ + StateSet finStateSetCopy, startStateSet; + MergeData md; + + /* Turn on misfit accounting for both graphs. */ + setMisfitAccounting( true ); + other->setMisfitAccounting( true ); + + /* Get the other's start state. */ + StateAp *otherStartState = other->startState; + + /* Unset other's start state before bringing in the entry points. */ + other->unsetStartState(); + + /* Bring in the rest of other's entry points. */ + copyInEntryPoints( other ); + other->entryPoints.empty(); + + /* Bring in other's states into our state lists. */ + stateList.append( other->stateList ); + misfitList.append( other->misfitList ); + + /* If from states is not set, then get a copy of our final state set before + * we clobber it and use it instead. */ + if ( fromStates == 0 ) { + finStateSetCopy = finStateSet; + fromStates = &finStateSetCopy; + } + + /* Unset all of our final states and get the final states from other. */ + if ( !optional ) + unsetAllFinStates(); + finStateSet.insert( other->finStateSet ); + + /* Since other's lists are empty, we can delete the fsm without + * affecting any states. */ + delete other; + + /* Merge our former final states with the start state of other. */ + for ( int i = 0; i < fromStates->length(); i++ ) { + StateAp *state = fromStates->data[i]; + + /* Merge the former final state with other's start state. */ + mergeStatesLeaving( md, state, otherStartState ); + + /* If the former final state was not reset final then we must clear + * the state's out trans data. If it got reset final then it gets to + * keep its out trans data. This must be done before fillInStates gets + * called to prevent the data from being sourced. */ + if ( ! state->isFinState() ) + clearOutData( state ); + } + + /* Fill in any new states made from merging. */ + fillInStates( md ); + + /* Remove the misfits and turn off misfit accounting. */ + removeMisfits(); + setMisfitAccounting( false ); +} + +/* Concatenates other to the end of this machine. Other is deleted. Any + * transitions made leaving this machine and entering into other are notified + * that they are leaving transitions by having the leavingFromState callback + * invoked. */ +void FsmAp::concatOp( FsmAp *other ) +{ + /* Assert same signedness and return graph concatenation op. */ + doConcat( other, 0, false ); +} + + +void FsmAp::doOr( FsmAp *other ) +{ + /* For the merging process. */ + MergeData md; + + /* Build a state set consisting of both start states */ + StateSet startStateSet; + startStateSet.insert( startState ); + startStateSet.insert( other->startState ); + + /* Both of the original start states loose their start state status. */ + unsetStartState(); + other->unsetStartState(); + + /* Bring in the rest of other's entry points. */ + copyInEntryPoints( other ); + other->entryPoints.empty(); + + /* Merge the lists. This will move all the states from other + * into this. No states will be deleted. */ + stateList.append( other->stateList ); + misfitList.append( other->misfitList ); + + /* Move the final set data from other into this. */ + finStateSet.insert(other->finStateSet); + other->finStateSet.empty(); + + /* Since other's list is empty, we can delete the fsm without + * affecting any states. */ + delete other; + + /* Create a new start state. */ + setStartState( addState() ); + + /* Merge the start states. */ + mergeStates( md, startState, startStateSet.data, startStateSet.length() ); + + /* Fill in any new states made from merging. */ + fillInStates( md ); +} + +/* Unions other with this machine. Other is deleted. */ +void FsmAp::unionOp( FsmAp *other ) +{ + /* Turn on misfit accounting for both graphs. */ + setMisfitAccounting( true ); + other->setMisfitAccounting( true ); + + /* Call Worker routine. */ + doOr( other ); + + /* Remove the misfits and turn off misfit accounting. */ + removeMisfits(); + setMisfitAccounting( false ); +} + +/* Intersects other with this machine. Other is deleted. */ +void FsmAp::intersectOp( FsmAp *other ) +{ + /* Turn on misfit accounting for both graphs. */ + setMisfitAccounting( true ); + other->setMisfitAccounting( true ); + + /* Set the fin bits on this and other to want each other. */ + setFinBits( SB_GRAPH1 ); + other->setFinBits( SB_GRAPH2 ); + + /* Call worker Or routine. */ + doOr( other ); + + /* Unset any final states that are no longer to + * be final due to final bits. */ + unsetIncompleteFinals(); + + /* Remove the misfits and turn off misfit accounting. */ + removeMisfits(); + setMisfitAccounting( false ); + + /* Remove states that have no path to a final state. */ + removeDeadEndStates(); +} + +/* Set subtracts other machine from this machine. Other is deleted. */ +void FsmAp::subtractOp( FsmAp *other ) +{ + /* Turn on misfit accounting for both graphs. */ + setMisfitAccounting( true ); + other->setMisfitAccounting( true ); + + /* Set the fin bits of other to be killers. */ + other->setFinBits( SB_GRAPH1 ); + + /* Call worker Or routine. */ + doOr( other ); + + /* Unset any final states that are no longer to + * be final due to final bits. */ + unsetKilledFinals(); + + /* Remove the misfits and turn off misfit accounting. */ + removeMisfits(); + setMisfitAccounting( false ); + + /* Remove states that have no path to a final state. */ + removeDeadEndStates(); +} + +bool FsmAp::inEptVect( EptVect *eptVect, StateAp *state ) +{ + if ( eptVect != 0 ) { + /* Vect is there, walk it looking for state. */ + for ( int i = 0; i < eptVect->length(); i++ ) { + if ( eptVect->data[i].targ == state ) + return true; + } + } + return false; +} + +/* Fill epsilon vectors in a root state from a given starting point. Epmploys + * a depth first search through the graph of epsilon transitions. */ +void FsmAp::epsilonFillEptVectFrom( StateAp *root, StateAp *from, bool parentLeaving ) +{ + /* Walk the epsilon transitions out of the state. */ + for ( EpsilonTrans::Iter ep = from->epsilonTrans; ep.lte(); ep++ ) { + /* Find the entry point, if the it does not resove, ignore it. */ + EntryMapEl *enLow, *enHigh; + if ( entryPoints.findMulti( *ep, enLow, enHigh ) ) { + /* Loop the targets. */ + for ( EntryMapEl *en = enLow; en <= enHigh; en++ ) { + /* Do not add the root or states already in eptVect. */ + StateAp *targ = en->value; + if ( targ != from && !inEptVect(root->eptVect, targ) ) { + /* Maybe need to create the eptVect. */ + if ( root->eptVect == 0 ) + root->eptVect = new EptVect(); + + /* If moving to a different graph or if any parent is + * leaving then we are leaving. */ + bool leaving = parentLeaving || + root->owningGraph != targ->owningGraph; + + /* All ok, add the target epsilon and recurse. */ + root->eptVect->append( EptVectEl(targ, leaving) ); + epsilonFillEptVectFrom( root, targ, leaving ); + } + } + } + } +} + +void FsmAp::shadowReadWriteStates( MergeData &md ) +{ + /* Init isolatedShadow algorithm data. */ + for ( StateList::Iter st = stateList; st.lte(); st++ ) + st->isolatedShadow = 0; + + /* Any states that may be both read from and written to must + * be shadowed. */ + for ( StateList::Iter st = stateList; st.lte(); st++ ) { + /* Find such states by looping through stateVect lists, which give us + * the states that will be read from. May cause us to visit the states + * that we are interested in more than once. */ + if ( st->eptVect != 0 ) { + /* For all states that will be read from. */ + for ( EptVect::Iter ept = *st->eptVect; ept.lte(); ept++ ) { + /* Check for read and write to the same state. */ + StateAp *targ = ept->targ; + if ( targ->eptVect != 0 ) { + /* State is to be written to, if the shadow is not already + * there, create it. */ + if ( targ->isolatedShadow == 0 ) { + StateAp *shadow = addState(); + mergeStates( md, shadow, targ ); + targ->isolatedShadow = shadow; + } + + /* Write shadow into the state vector so that it is the + * state that the epsilon transition will read from. */ + ept->targ = targ->isolatedShadow; + } + } + } + } +} + +void FsmAp::resolveEpsilonTrans( MergeData &md ) +{ + /* Walk the state list and invoke recursive worker on each state. */ + for ( StateList::Iter st = stateList; st.lte(); st++ ) + epsilonFillEptVectFrom( st, st, false ); + + /* Prevent reading from and writing to of the same state. */ + shadowReadWriteStates( md ); + + /* For all states that have epsilon transitions out, draw the transitions, + * clear the epsilon transitions. */ + for ( StateList::Iter st = stateList; st.lte(); st++ ) { + /* If there is a state vector, then create the pre-merge state. */ + if ( st->eptVect != 0 ) { + /* Merge all the epsilon targets into the state. */ + for ( EptVect::Iter ept = *st->eptVect; ept.lte(); ept++ ) { + if ( ept->leaving ) + mergeStatesLeaving( md, st, ept->targ ); + else + mergeStates( md, st, ept->targ ); + } + + /* Clean up the target list. */ + delete st->eptVect; + st->eptVect = 0; + } + + /* Clear the epsilon transitions vector. */ + st->epsilonTrans.empty(); + } +} + +void FsmAp::epsilonOp() +{ + /* For merging process. */ + MergeData md; + + setMisfitAccounting( true ); + + for ( StateList::Iter st = stateList; st.lte(); st++ ) + st->owningGraph = 0; + + /* Perform merges. */ + resolveEpsilonTrans( md ); + + /* Epsilons can caused merges which leave behind unreachable states. */ + fillInStates( md ); + + /* Remove the misfits and turn off misfit accounting. */ + removeMisfits(); + setMisfitAccounting( false ); +} + +/* Make a new maching by joining together a bunch of machines without making + * any transitions between them. A negative finalId results in there being no + * final id. */ +void FsmAp::joinOp( int startId, int finalId, FsmAp **others, int numOthers ) +{ + /* For the merging process. */ + MergeData md; + + /* Set the owning machines. Start at one. Zero is reserved for the start + * and final states. */ + for ( StateList::Iter st = stateList; st.lte(); st++ ) + st->owningGraph = 1; + for ( int m = 0; m < numOthers; m++ ) { + for ( StateList::Iter st = others[m]->stateList; st.lte(); st++ ) + st->owningGraph = 2+m; + } + + /* All machines loose start state status. */ + unsetStartState(); + for ( int m = 0; m < numOthers; m++ ) + others[m]->unsetStartState(); + + /* Bring the other machines into this. */ + for ( int m = 0; m < numOthers; m++ ) { + /* Bring in the rest of other's entry points. */ + copyInEntryPoints( others[m] ); + others[m]->entryPoints.empty(); + + /* Merge the lists. This will move all the states from other into + * this. No states will be deleted. */ + stateList.append( others[m]->stateList ); + assert( others[m]->misfitList.length() == 0 ); + + /* Move the final set data from other into this. */ + finStateSet.insert( others[m]->finStateSet ); + others[m]->finStateSet.empty(); + + /* Since other's list is empty, we can delete the fsm without + * affecting any states. */ + delete others[m]; + } + + /* Look up the start entry point. */ + EntryMapEl *enLow = 0, *enHigh = 0; + bool findRes = entryPoints.findMulti( startId, enLow, enHigh ); + if ( ! findRes ) { + /* No start state. Set a default one and proceed with the join. Note + * that the result of the join will be a very uninteresting machine. */ + setStartState( addState() ); + } + else { + /* There is at least one start state, create a state that will become + * the new start state. */ + StateAp *newStart = addState(); + setStartState( newStart ); + + /* The start state is in an owning machine class all it's own. */ + newStart->owningGraph = 0; + + /* Create the set of states to merge from. */ + StateSet stateSet; + for ( EntryMapEl *en = enLow; en <= enHigh; en++ ) + stateSet.insert( en->value ); + + /* Merge in the set of start states into the new start state. */ + mergeStates( md, newStart, stateSet.data, stateSet.length() ); + } + + /* Take a copy of the final state set, before unsetting them all. This + * will allow us to call clearOutData on the states that don't get + * final state status back back. */ + StateSet finStateSetCopy = finStateSet; + + /* Now all final states are unset. */ + unsetAllFinStates(); + + if ( finalId >= 0 ) { + /* Create the implicit final state. */ + StateAp *finState = addState(); + setFinState( finState ); + + /* Assign an entry into the final state on the final state entry id. Note + * that there may already be an entry on this id. That's ok. Also set the + * final state owning machine id. It's in a class all it's own. */ + setEntry( finalId, finState ); + finState->owningGraph = 0; + } + + /* Hand over to workers for resolving epsilon trans. This will merge states + * with the targets of their epsilon transitions. */ + resolveEpsilonTrans( md ); + + /* Invoke the relinquish final callback on any states that did not get + * final state status back. */ + for ( StateSet::Iter st = finStateSetCopy; st.lte(); st++ ) { + if ( !((*st)->stateBits & SB_ISFINAL) ) + clearOutData( *st ); + } + + /* Fill in any new states made from merging. */ + fillInStates( md ); + + /* Joining can be messy. Instead of having misfit accounting on (which is + * tricky here) do a full cleaning. */ + removeUnreachableStates(); +} + +void FsmAp::globOp( FsmAp **others, int numOthers ) +{ + /* All other machines loose start states status. */ + for ( int m = 0; m < numOthers; m++ ) + others[m]->unsetStartState(); + + /* Bring the other machines into this. */ + for ( int m = 0; m < numOthers; m++ ) { + /* Bring in the rest of other's entry points. */ + copyInEntryPoints( others[m] ); + others[m]->entryPoints.empty(); + + /* Merge the lists. This will move all the states from other into + * this. No states will be deleted. */ + stateList.append( others[m]->stateList ); + assert( others[m]->misfitList.length() == 0 ); + + /* Move the final set data from other into this. */ + finStateSet.insert( others[m]->finStateSet ); + others[m]->finStateSet.empty(); + + /* Since other's list is empty, we can delete the fsm without + * affecting any states. */ + delete others[m]; + } +} + +void FsmAp::deterministicEntry() +{ + /* For the merging process. */ + MergeData md; + + /* States may loose their entry points, turn on misfit accounting. */ + setMisfitAccounting( true ); + + /* Get a copy of the entry map then clear all the entry points. As we + * iterate the old entry map finding duplicates we will add the entry + * points for the new states that we create. */ + EntryMap prevEntry = entryPoints; + unsetAllEntryPoints(); + + for ( int enId = 0; enId < prevEntry.length(); ) { + /* Count the number of states on this entry key. */ + int highId = enId; + while ( highId < prevEntry.length() && prevEntry[enId].key == prevEntry[highId].key ) + highId += 1; + + int numIds = highId - enId; + if ( numIds == 1 ) { + /* Only a single entry point, just set the entry. */ + setEntry( prevEntry[enId].key, prevEntry[enId].value ); + } + else { + /* Multiple entry points, need to create a new state and merge in + * all the targets of entry points. */ + StateAp *newEntry = addState(); + for ( int en = enId; en < highId; en++ ) + mergeStates( md, newEntry, prevEntry[en].value ); + + /* Add the new state as the single entry point. */ + setEntry( prevEntry[enId].key, newEntry ); + } + + enId += numIds; + } + + /* The old start state may be unreachable. Remove the misfits and turn off + * misfit accounting. */ + removeMisfits(); + setMisfitAccounting( false ); +} + +/* Unset any final states that are no longer to be final due to final bits. */ +void FsmAp::unsetKilledFinals() +{ + /* Duplicate the final state set before we begin modifying it. */ + StateSet fin( finStateSet ); + + for ( int s = 0; s < fin.length(); s++ ) { + /* Check for killing bit. */ + StateAp *state = fin.data[s]; + if ( state->stateBits & SB_GRAPH1 ) { + /* One final state is a killer, set to non-final. */ + unsetFinState( state ); + } + + /* Clear all killing bits. Non final states should never have had those + * state bits set in the first place. */ + state->stateBits &= ~SB_GRAPH1; + } +} + +/* Unset any final states that are no longer to be final due to final bits. */ +void FsmAp::unsetIncompleteFinals() +{ + /* Duplicate the final state set before we begin modifying it. */ + StateSet fin( finStateSet ); + + for ( int s = 0; s < fin.length(); s++ ) { + /* Check for one set but not the other. */ + StateAp *state = fin.data[s]; + if ( state->stateBits & SB_BOTH && + (state->stateBits & SB_BOTH) != SB_BOTH ) + { + /* One state wants the other but it is not there. */ + unsetFinState( state ); + } + + /* Clear wanting bits. Non final states should never have had those + * state bits set in the first place. */ + state->stateBits &= ~SB_BOTH; + } +} + +/* Ensure that the start state is free of entry points (aside from the fact + * that it is the start state). If the start state has entry points then Make a + * new start state by merging with the old one. Useful before modifying start + * transitions. If the existing start state has any entry points other than the + * start state entry then modifying its transitions changes more than the start + * transitions. So isolate the start state by separating it out such that it + * only has start stateness as it's entry point. */ +void FsmAp::isolateStartState( ) +{ + /* For the merging process. */ + MergeData md; + + /* Bail out if the start state is already isolated. */ + if ( isStartStateIsolated() ) + return; + + /* Turn on misfit accounting to possibly catch the old start state. */ + setMisfitAccounting( true ); + + /* This will be the new start state. The existing start + * state is merged with it. */ + StateAp *prevStartState = startState; + unsetStartState(); + setStartState( addState() ); + + /* Merge the new start state with the old one to isolate it. */ + mergeStates( md, startState, prevStartState ); + + /* Stfil and stateDict will be empty because the merging of the old start + * state into the new one will not have any conflicting transitions. */ + assert( md.stateDict.treeSize == 0 ); + assert( md.stfillHead == 0 ); + + /* The old start state may be unreachable. Remove the misfits and turn off + * misfit accounting. */ + removeMisfits(); + setMisfitAccounting( false ); +} + +#ifdef LOG_CONDS +void logCondSpace( CondSpace *condSpace ) +{ + if ( condSpace == 0 ) + cerr << "<empty>"; + else { + for ( CondSet::Iter csi = condSpace->condSet.last(); csi.gtb(); csi-- ) { + if ( ! csi.last() ) + cerr << ','; + (*csi)->actionName( cerr ); + } + } +} + +void logNewExpansion( Expansion *exp ) +{ + cerr << "created expansion:" << endl; + cerr << " range: " << exp->lowKey.getVal() << " .. " << + exp->highKey.getVal() << endl; + + cerr << " fromCondSpace: "; + logCondSpace( exp->fromCondSpace ); + cerr << endl; + cerr << " fromVals: " << exp->fromVals << endl; + + cerr << " toCondSpace: "; + logCondSpace( exp->toCondSpace ); + cerr << endl; + cerr << " toValsList: "; + for ( LongVect::Iter to = exp->toValsList; to.lte(); to++ ) + cerr << " " << *to; + cerr << endl; +} +#endif + + +void FsmAp::findTransExpansions( ExpansionList &expansionList, + StateAp *destState, StateAp *srcState ) +{ + PairIter<TransAp, StateCond> transCond( destState->outList.head, + srcState->stateCondList.head ); + for ( ; !transCond.end(); transCond++ ) { + if ( transCond.userState == RangeOverlap ) { + Expansion *expansion = new Expansion( transCond.s1Tel.lowKey, + transCond.s1Tel.highKey ); + expansion->fromTrans = new TransAp(*transCond.s1Tel.trans); + expansion->fromTrans->fromState = 0; + expansion->fromTrans->toState = transCond.s1Tel.trans->toState; + expansion->fromCondSpace = 0; + expansion->fromVals = 0; + CondSpace *srcCS = transCond.s2Tel.trans->condSpace; + expansion->toCondSpace = srcCS; + + long numTargVals = (1 << srcCS->condSet.length()); + for ( long targVals = 0; targVals < numTargVals; targVals++ ) + expansion->toValsList.append( targVals ); + + #ifdef LOG_CONDS + logNewExpansion( expansion ); + #endif + expansionList.append( expansion ); + } + } +} + +void FsmAp::findCondExpInTrans( ExpansionList &expansionList, StateAp *state, + Key lowKey, Key highKey, CondSpace *fromCondSpace, CondSpace *toCondSpace, + long fromVals, LongVect &toValsList ) +{ + TransAp searchTrans; + searchTrans.lowKey = fromCondSpace->baseKey + fromVals * keyOps->alphSize() + + (lowKey - keyOps->minKey); + searchTrans.highKey = fromCondSpace->baseKey + fromVals * keyOps->alphSize() + + (highKey - keyOps->minKey); + searchTrans.prev = searchTrans.next = 0; + + PairIter<TransAp> pairIter( state->outList.head, &searchTrans ); + for ( ; !pairIter.end(); pairIter++ ) { + if ( pairIter.userState == RangeOverlap ) { + Expansion *expansion = new Expansion( lowKey, highKey ); + expansion->fromTrans = new TransAp(*pairIter.s1Tel.trans); + expansion->fromTrans->fromState = 0; + expansion->fromTrans->toState = pairIter.s1Tel.trans->toState; + expansion->fromCondSpace = fromCondSpace; + expansion->fromVals = fromVals; + expansion->toCondSpace = toCondSpace; + expansion->toValsList = toValsList; + + expansionList.append( expansion ); + #ifdef LOG_CONDS + logNewExpansion( expansion ); + #endif + } + } +} + +void FsmAp::findCondExpansions( ExpansionList &expansionList, + StateAp *destState, StateAp *srcState ) +{ + PairIter<StateCond, StateCond> condCond( destState->stateCondList.head, + srcState->stateCondList.head ); + for ( ; !condCond.end(); condCond++ ) { + if ( condCond.userState == RangeOverlap ) { + /* Loop over all existing condVals . */ + CondSet &destCS = condCond.s1Tel.trans->condSpace->condSet; + long destLen = destCS.length(); + + /* Find the items in src cond set that are not in dest + * cond set. These are the items that we must expand. */ + CondSet srcOnlyCS = condCond.s2Tel.trans->condSpace->condSet; + for ( CondSet::Iter dcsi = destCS; dcsi.lte(); dcsi++ ) + srcOnlyCS.remove( *dcsi ); + long srcOnlyLen = srcOnlyCS.length(); + + if ( srcOnlyCS.length() > 0 ) { + #ifdef LOG_CONDS + cerr << "there are " << srcOnlyCS.length() << " item(s) that are " + "only in the srcCS" << endl; + #endif + + CondSet mergedCS = destCS; + mergedCS.insert( condCond.s2Tel.trans->condSpace->condSet ); + + CondSpace *fromCondSpace = addCondSpace( destCS ); + CondSpace *toCondSpace = addCondSpace( mergedCS ); + + /* Loop all values in the dest space. */ + for ( long destVals = 0; destVals < (1 << destLen); destVals++ ) { + long basicVals = 0; + for ( CondSet::Iter csi = destCS; csi.lte(); csi++ ) { + if ( destVals & (1 << csi.pos()) ) { + Action **cim = mergedCS.find( *csi ); + long bitPos = (cim - mergedCS.data); + basicVals |= 1 << bitPos; + } + } + + /* Loop all new values. */ + LongVect expandToVals; + for ( long soVals = 0; soVals < (1 << srcOnlyLen); soVals++ ) { + long targVals = basicVals; + for ( CondSet::Iter csi = srcOnlyCS; csi.lte(); csi++ ) { + if ( soVals & (1 << csi.pos()) ) { + Action **cim = mergedCS.find( *csi ); + long bitPos = (cim - mergedCS.data); + targVals |= 1 << bitPos; + } + } + expandToVals.append( targVals ); + } + + findCondExpInTrans( expansionList, destState, + condCond.s1Tel.lowKey, condCond.s1Tel.highKey, + fromCondSpace, toCondSpace, destVals, expandToVals ); + } + } + } + } +} + +void FsmAp::doExpand( MergeData &md, StateAp *destState, ExpansionList &expList1 ) +{ + for ( ExpansionList::Iter exp = expList1; exp.lte(); exp++ ) { + for ( LongVect::Iter to = exp->toValsList; to.lte(); to++ ) { + long targVals = *to; + + /* We will use the copy of the transition that was made when the + * expansion was created. It will get used multiple times. Each + * time we must set up the keys, everything else is constant and + * and already prepared. */ + TransAp *srcTrans = exp->fromTrans; + + srcTrans->lowKey = exp->toCondSpace->baseKey + + targVals * keyOps->alphSize() + (exp->lowKey - keyOps->minKey); + srcTrans->highKey = exp->toCondSpace->baseKey + + targVals * keyOps->alphSize() + (exp->highKey - keyOps->minKey); + + TransList srcList; + srcList.append( srcTrans ); + outTransCopy( md, destState, srcList.head ); + srcList.abandon(); + } + } +} + + +void FsmAp::doRemove( MergeData &md, StateAp *destState, ExpansionList &expList1 ) +{ + for ( ExpansionList::Iter exp = expList1; exp.lte(); exp++ ) { + Removal removal; + if ( exp->fromCondSpace == 0 ) { + removal.lowKey = exp->lowKey; + removal.highKey = exp->highKey; + } + else { + removal.lowKey = exp->fromCondSpace->baseKey + + exp->fromVals * keyOps->alphSize() + (exp->lowKey - keyOps->minKey); + removal.highKey = exp->fromCondSpace->baseKey + + exp->fromVals * keyOps->alphSize() + (exp->highKey - keyOps->minKey); + } + removal.next = 0; + + TransList destList; + PairIter<TransAp, Removal> pairIter( destState->outList.head, &removal ); + for ( ; !pairIter.end(); pairIter++ ) { + switch ( pairIter.userState ) { + case RangeInS1: { + TransAp *destTrans = pairIter.s1Tel.trans; + destTrans->lowKey = pairIter.s1Tel.lowKey; + destTrans->highKey = pairIter.s1Tel.highKey; + destList.append( destTrans ); + break; + } + case RangeInS2: + break; + case RangeOverlap: { + TransAp *trans = pairIter.s1Tel.trans; + detachTrans( trans->fromState, trans->toState, trans ); + delete trans; + break; + } + case BreakS1: { + pairIter.s1Tel.trans = dupTrans( destState, + pairIter.s1Tel.trans ); + break; + } + case BreakS2: + break; + } + } + destState->outList.transfer( destList ); + } +} + +void FsmAp::mergeStateConds( StateAp *destState, StateAp *srcState ) +{ + StateCondList destList; + PairIter<StateCond> pairIter( destState->stateCondList.head, + srcState->stateCondList.head ); + for ( ; !pairIter.end(); pairIter++ ) { + switch ( pairIter.userState ) { + case RangeInS1: { + StateCond *destCond = pairIter.s1Tel.trans; + destCond->lowKey = pairIter.s1Tel.lowKey; + destCond->highKey = pairIter.s1Tel.highKey; + destList.append( destCond ); + break; + } + case RangeInS2: { + StateCond *newCond = new StateCond( *pairIter.s2Tel.trans ); + newCond->lowKey = pairIter.s2Tel.lowKey; + newCond->highKey = pairIter.s2Tel.highKey; + destList.append( newCond ); + break; + } + case RangeOverlap: { + StateCond *destCond = pairIter.s1Tel.trans; + StateCond *srcCond = pairIter.s2Tel.trans; + CondSet mergedCondSet; + mergedCondSet.insert( destCond->condSpace->condSet ); + mergedCondSet.insert( srcCond->condSpace->condSet ); + destCond->condSpace = addCondSpace( mergedCondSet ); + + destCond->lowKey = pairIter.s1Tel.lowKey; + destCond->highKey = pairIter.s1Tel.highKey; + destList.append( destCond ); + break; + } + case BreakS1: + pairIter.s1Tel.trans = new StateCond( *pairIter.s1Tel.trans ); + break; + + case BreakS2: + break; + } + } + destState->stateCondList.transfer( destList ); +} + +/* A state merge which represents the drawing in of leaving transitions. If + * there is any out data then we duplicate the souce state, transfer the out + * data, then merge in the state. The new state will be reaped because it will + * not be given any in transitions. */ +void FsmAp::mergeStatesLeaving( MergeData &md, StateAp *destState, StateAp *srcState ) +{ + if ( !hasOutData( destState ) ) + mergeStates( md, destState, srcState ); + else { + StateAp *ssMutable = addState(); + mergeStates( md, ssMutable, srcState ); + transferOutData( ssMutable, destState ); + + for ( ActionSet::Iter cond = destState->outCondSet; cond.lte(); cond++ ) + embedCondition( md, ssMutable, *cond ); + + mergeStates( md, destState, ssMutable ); + } +} + +void FsmAp::mergeStates( MergeData &md, StateAp *destState, + StateAp **srcStates, int numSrc ) +{ + for ( int s = 0; s < numSrc; s++ ) + mergeStates( md, destState, srcStates[s] ); +} + +void FsmAp::mergeStates( MergeData &md, StateAp *destState, StateAp *srcState ) +{ + ExpansionList expList1; + ExpansionList expList2; + + findTransExpansions( expList1, destState, srcState ); + findCondExpansions( expList1, destState, srcState ); + findTransExpansions( expList2, srcState, destState ); + findCondExpansions( expList2, srcState, destState ); + + mergeStateConds( destState, srcState ); + + outTransCopy( md, destState, srcState->outList.head ); + + doExpand( md, destState, expList1 ); + doExpand( md, destState, expList2 ); + + doRemove( md, destState, expList1 ); + doRemove( md, destState, expList2 ); + + expList1.empty(); + expList2.empty(); + + /* Get its bits and final state status. */ + destState->stateBits |= ( srcState->stateBits & ~SB_ISFINAL ); + if ( srcState->isFinState() ) + setFinState( destState ); + + /* Draw in any properties of srcState into destState. */ + if ( srcState == destState ) { + /* Duplicate the list to protect against write to source. The + * priorities sets are not copied in because that would have no + * effect. */ + destState->epsilonTrans.append( EpsilonTrans( srcState->epsilonTrans ) ); + + /* Get all actions, duplicating to protect against write to source. */ + destState->toStateActionTable.setActions( + ActionTable( srcState->toStateActionTable ) ); + destState->fromStateActionTable.setActions( + ActionTable( srcState->fromStateActionTable ) ); + destState->outActionTable.setActions( ActionTable( srcState->outActionTable ) ); + destState->outCondSet.insert( ActionSet( srcState->outCondSet ) ); + destState->errActionTable.setActions( ErrActionTable( srcState->errActionTable ) ); + destState->eofActionTable.setActions( ActionTable( srcState->eofActionTable ) ); + } + else { + /* Get the epsilons, out priorities. */ + destState->epsilonTrans.append( srcState->epsilonTrans ); + destState->outPriorTable.setPriors( srcState->outPriorTable ); + + /* Get all actions. */ + destState->toStateActionTable.setActions( srcState->toStateActionTable ); + destState->fromStateActionTable.setActions( srcState->fromStateActionTable ); + destState->outActionTable.setActions( srcState->outActionTable ); + destState->outCondSet.insert( srcState->outCondSet ); + destState->errActionTable.setActions( srcState->errActionTable ); + destState->eofActionTable.setActions( srcState->eofActionTable ); + } +} + +void FsmAp::fillInStates( MergeData &md ) +{ + /* Merge any states that are awaiting merging. This will likey cause + * other states to be added to the stfil list. */ + StateAp *state = md.stfillHead; + while ( state != 0 ) { + StateSet *stateSet = &state->stateDictEl->stateSet; + mergeStates( md, state, stateSet->data, stateSet->length() ); + state = state->alg.next; + } + + /* Delete the state sets of all states that are on the fill list. */ + state = md.stfillHead; + while ( state != 0 ) { + /* Delete and reset the state set. */ + delete state->stateDictEl; + state->stateDictEl = 0; + + /* Next state in the stfill list. */ + state = state->alg.next; + } + + /* StateDict will still have its ptrs/size set but all of it's element + * will be deleted so we don't need to clean it up. */ +} + +void FsmAp::findEmbedExpansions( ExpansionList &expansionList, + StateAp *destState, Action *condAction ) +{ + StateCondList destList; + PairIter<TransAp, StateCond> transCond( destState->outList.head, + destState->stateCondList.head ); + for ( ; !transCond.end(); transCond++ ) { + switch ( transCond.userState ) { + case RangeInS1: { + if ( transCond.s1Tel.lowKey <= keyOps->maxKey ) { + assert( transCond.s1Tel.highKey <= keyOps->maxKey ); + + /* Make a new state cond. */ + StateCond *newStateCond = new StateCond( transCond.s1Tel.lowKey, + transCond.s1Tel.highKey ); + newStateCond->condSpace = addCondSpace( CondSet( condAction ) ); + destList.append( newStateCond ); + + /* Create the expansion. */ + Expansion *expansion = new Expansion( transCond.s1Tel.lowKey, + transCond.s1Tel.highKey ); + expansion->fromTrans = new TransAp(*transCond.s1Tel.trans); + expansion->fromTrans->fromState = 0; + expansion->fromTrans->toState = transCond.s1Tel.trans->toState; + expansion->fromCondSpace = 0; + expansion->fromVals = 0; + expansion->toCondSpace = newStateCond->condSpace; + expansion->toValsList.append( 1 ); + #ifdef LOG_CONDS + logNewExpansion( expansion ); + #endif + expansionList.append( expansion ); + } + break; + } + case RangeInS2: { + /* Enhance state cond and find the expansion. */ + StateCond *stateCond = transCond.s2Tel.trans; + stateCond->lowKey = transCond.s2Tel.lowKey; + stateCond->highKey = transCond.s2Tel.highKey; + + CondSet &destCS = stateCond->condSpace->condSet; + long destLen = destCS.length(); + CondSpace *fromCondSpace = stateCond->condSpace; + + CondSet mergedCS = destCS; + mergedCS.insert( condAction ); + CondSpace *toCondSpace = addCondSpace( mergedCS ); + stateCond->condSpace = toCondSpace; + destList.append( stateCond ); + + /* Loop all values in the dest space. */ + for ( long destVals = 0; destVals < (1 << destLen); destVals++ ) { + long basicVals = 0; + for ( CondSet::Iter csi = destCS; csi.lte(); csi++ ) { + if ( destVals & (1 << csi.pos()) ) { + Action **cim = mergedCS.find( *csi ); + long bitPos = (cim - mergedCS.data); + basicVals |= 1 << bitPos; + } + } + + long targVals = basicVals; + Action **cim = mergedCS.find( condAction ); + long bitPos = (cim - mergedCS.data); + targVals |= 1 << bitPos; + + LongVect expandToVals( targVals ); + findCondExpInTrans( expansionList, destState, + transCond.s2Tel.lowKey, transCond.s2Tel.highKey, + fromCondSpace, toCondSpace, destVals, expandToVals ); + } + break; + } + + + case RangeOverlap: + case BreakS1: + case BreakS2: + assert( false ); + break; + } + } + + destState->stateCondList.transfer( destList ); +} + +void FsmAp::embedCondition( StateAp *state, Action *condAction ) +{ + MergeData md; + ExpansionList expList; + + /* Turn on misfit accounting to possibly catch the old start state. */ + setMisfitAccounting( true ); + + /* Worker. */ + embedCondition( md, state, condAction ); + + /* Fill in any states that were newed up as combinations of others. */ + fillInStates( md ); + + /* Remove the misfits and turn off misfit accounting. */ + removeMisfits(); + setMisfitAccounting( false ); +} + +void FsmAp::embedCondition( MergeData &md, StateAp *state, Action *condAction ) +{ + ExpansionList expList; + + findEmbedExpansions( expList, state, condAction ); + doExpand( md, state, expList ); + doRemove( md, state, expList ); + expList.empty(); +} + +/* Check if a machine defines a single character. This is useful in validating + * ranges and machines to export. */ +bool FsmAp::checkSingleCharMachine() +{ + /* Must have two states. */ + if ( stateList.length() != 2 ) + return false; + /* The start state cannot be final. */ + if ( startState->isFinState() ) + return false; + /* There should be only one final state. */ + if ( finStateSet.length() != 1 ) + return false; + /* The final state cannot have any transitions out. */ + if ( finStateSet[0]->outList.length() != 0 ) + return false; + /* The start state should have only one transition out. */ + if ( startState->outList.length() != 1 ) + return false; + /* The singe transition out of the start state should not be a range. */ + TransAp *startTrans = startState->outList.head; + if ( startTrans->lowKey != startTrans->highKey ) + return false; + return true; +} + diff --git a/contrib/tools/ragel5/ragel/fsmgraph.h b/contrib/tools/ragel5/ragel/fsmgraph.h new file mode 100644 index 0000000000..062031c3aa --- /dev/null +++ b/contrib/tools/ragel5/ragel/fsmgraph.h @@ -0,0 +1,1482 @@ +/* + * Copyright 2001-2007 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _FSMGRAPH_H +#define _FSMGRAPH_H + +#include <assert.h> +#include <iostream> +#include "common.h" +#include "vector.h" +#include "bstset.h" +#include "compare.h" +#include "avltree.h" +#include "dlist.h" +#include "bstmap.h" +#include "sbstmap.h" +#include "sbstset.h" +#include "sbsttable.h" +#include "avlset.h" +#include "avlmap.h" +#include "ragel.h" + +//#define LOG_CONDS + +/* Flags that control merging. */ +#define SB_GRAPH1 0x01 +#define SB_GRAPH2 0x02 +#define SB_BOTH 0x03 +#define SB_ISFINAL 0x04 +#define SB_ISMARKED 0x08 +#define SB_ONLIST 0x10 + +using std::ostream; + +struct TransAp; +struct StateAp; +struct FsmAp; +struct Action; +struct LongestMatchPart; + +/* State list element for unambiguous access to list element. */ +struct FsmListEl +{ + StateAp *prev, *next; +}; + +/* This is the marked index for a state pair. Used in minimization. It keeps + * track of whether or not the state pair is marked. */ +struct MarkIndex +{ + MarkIndex(int states); + ~MarkIndex(); + + void markPair(int state1, int state2); + bool isPairMarked(int state1, int state2); + +private: + int numStates; + bool *array; +}; + +extern KeyOps *keyOps; + +/* Transistion Action Element. */ +typedef SBstMapEl< int, Action* > ActionTableEl; + +/* Nodes in the tree that use this action. */ +struct NameInst; +struct InlineList; +typedef Vector<NameInst*> ActionRefs; + +/* Element in list of actions. Contains the string for the code to exectute. */ +struct Action +: + public DListEl<Action>, + public AvlTreeEl<Action> +{ +public: + + Action( const InputLoc &loc, const char *name, InlineList *inlineList, int condId ) + : + loc(loc), + name(name), + inlineList(inlineList), + actionId(-1), + numTransRefs(0), + numToStateRefs(0), + numFromStateRefs(0), + numEofRefs(0), + numCondRefs(0), + anyCall(false), + isLmAction(false), + condId(condId) + { + } + + /* Key for action dictionary. */ + const char *getKey() const { return name; } + + /* Data collected during parse. */ + InputLoc loc; + const char *name; + InlineList *inlineList; + int actionId; + + void actionName( ostream &out ) + { + if ( name != 0 ) + out << name; + else + out << loc.line << ":" << loc.col; + } + + /* Places in the input text that reference the action. */ + ActionRefs actionRefs; + + /* Number of references in the final machine. */ + int numRefs() + { return numTransRefs + numToStateRefs + numFromStateRefs + numEofRefs; } + int numTransRefs; + int numToStateRefs; + int numFromStateRefs; + int numEofRefs; + int numCondRefs; + bool anyCall; + + bool isLmAction; + int condId; +}; + +struct CmpCondId +{ + static inline int compare( const Action *cond1, const Action *cond2 ) + { + if ( cond1->condId < cond2->condId ) + return -1; + else if ( cond1->condId > cond2->condId ) + return 1; + return 0; + } +}; + +/* A list of actions. */ +typedef DList<Action> ActionList; +typedef AvlTree<Action, char *, CmpStr> ActionDict; + +/* Structure for reverse action mapping. */ +struct RevActionMapEl +{ + char *name; + InputLoc location; +}; + + +/* Transition Action Table. */ +struct ActionTable + : public SBstMap< int, Action*, CmpOrd<int> > +{ + void setAction( int ordering, Action *action ); + void setActions( int *orderings, Action **actions, int nActs ); + void setActions( const ActionTable &other ); + + bool hasAction( Action *action ); +}; + +typedef SBstSet< Action*, CmpOrd<Action*> > ActionSet; +typedef CmpSTable< Action*, CmpOrd<Action*> > CmpActionSet; + +/* Transistion Action Element. */ +typedef SBstMapEl< int, LongestMatchPart* > LmActionTableEl; + +/* Transition Action Table. */ +struct LmActionTable + : public SBstMap< int, LongestMatchPart*, CmpOrd<int> > +{ + void setAction( int ordering, LongestMatchPart *action ); + void setActions( const LmActionTable &other ); +}; + +/* Compare of a whole action table element (key & value). */ +struct CmpActionTableEl +{ + static int compare( const ActionTableEl &action1, + const ActionTableEl &action2 ) + { + if ( action1.key < action2.key ) + return -1; + else if ( action1.key > action2.key ) + return 1; + else if ( action1.value < action2.value ) + return -1; + else if ( action1.value > action2.value ) + return 1; + return 0; + } +}; + +/* Compare for ActionTable. */ +typedef CmpSTable< ActionTableEl, CmpActionTableEl > CmpActionTable; + +/* Compare of a whole lm action table element (key & value). */ +struct CmpLmActionTableEl +{ + static int compare( const LmActionTableEl &lmAction1, + const LmActionTableEl &lmAction2 ) + { + if ( lmAction1.key < lmAction2.key ) + return -1; + else if ( lmAction1.key > lmAction2.key ) + return 1; + else if ( lmAction1.value < lmAction2.value ) + return -1; + else if ( lmAction1.value > lmAction2.value ) + return 1; + return 0; + } +}; + +/* Compare for ActionTable. */ +typedef CmpSTable< LmActionTableEl, CmpLmActionTableEl > CmpLmActionTable; + +/* Action table element for error action tables. Adds the encoding of transfer + * point. */ +struct ErrActionTableEl +{ + ErrActionTableEl( Action *action, int ordering, int transferPoint ) + : ordering(ordering), action(action), transferPoint(transferPoint) { } + + /* Ordering and id of the action embedding. */ + int ordering; + Action *action; + + /* Id of point of transfere from Error action table to transtions and + * eofActionTable. */ + int transferPoint; + + int getKey() const { return ordering; } +}; + +struct ErrActionTable + : public SBstTable< ErrActionTableEl, int, CmpOrd<int> > +{ + void setAction( int ordering, Action *action, int transferPoint ); + void setActions( const ErrActionTable &other ); +}; + +/* Compare of an error action table element (key & value). */ +struct CmpErrActionTableEl +{ + static int compare( const ErrActionTableEl &action1, + const ErrActionTableEl &action2 ) + { + if ( action1.ordering < action2.ordering ) + return -1; + else if ( action1.ordering > action2.ordering ) + return 1; + else if ( action1.action < action2.action ) + return -1; + else if ( action1.action > action2.action ) + return 1; + else if ( action1.transferPoint < action2.transferPoint ) + return -1; + else if ( action1.transferPoint > action2.transferPoint ) + return 1; + return 0; + } +}; + +/* Compare for ErrActionTable. */ +typedef CmpSTable< ErrActionTableEl, CmpErrActionTableEl > CmpErrActionTable; + + +/* Descibe a priority, shared among PriorEls. + * Has key and whether or not used. */ +struct PriorDesc +{ + int key; + int priority; +}; + +/* Element in the arrays of priorities for transitions and arrays. Ordering is + * unique among instantiations of machines, desc is shared. */ +struct PriorEl +{ + PriorEl( int ordering, PriorDesc *desc ) + : ordering(ordering), desc(desc) { } + + int ordering; + PriorDesc *desc; +}; + +/* Compare priority elements, which are ordered by the priority descriptor + * key. */ +struct PriorElCmp +{ + static inline int compare( const PriorEl &pel1, const PriorEl &pel2 ) + { + if ( pel1.desc->key < pel2.desc->key ) + return -1; + else if ( pel1.desc->key > pel2.desc->key ) + return 1; + else + return 0; + } +}; + + +/* Priority Table. */ +struct PriorTable + : public SBstSet< PriorEl, PriorElCmp > +{ + void setPrior( int ordering, PriorDesc *desc ); + void setPriors( const PriorTable &other ); +}; + +/* Compare of prior table elements for distinguising state data. */ +struct CmpPriorEl +{ + static inline int compare( const PriorEl &pel1, const PriorEl &pel2 ) + { + if ( pel1.desc < pel2.desc ) + return -1; + else if ( pel1.desc > pel2.desc ) + return 1; + else if ( pel1.ordering < pel2.ordering ) + return -1; + else if ( pel1.ordering > pel2.ordering ) + return 1; + return 0; + } +}; + +/* Compare of PriorTable distinguising state data. Using a compare of the + * pointers is a little more strict than it needs be. It requires that + * prioritiy tables have the exact same set of priority assignment operators + * (from the input lang) to be considered equal. + * + * Really only key-value pairs need be tested and ordering be merged. However + * this would require that in the fuseing of states, priority descriptors be + * chosen for the new fused state based on priority. Since the out transition + * lists and ranges aren't necessarily going to line up, this is more work for + * little gain. Final compression resets all priorities first, so this would + * only be useful for compression at every operator, which is only an + * undocumented test feature. + */ +typedef CmpSTable<PriorEl, CmpPriorEl> CmpPriorTable; + +/* Plain action list that imposes no ordering. */ +typedef Vector<int> TransFuncList; + +/* Comparison for TransFuncList. */ +typedef CmpTable< int, CmpOrd<int> > TransFuncListCompare; + +/* Transition class that implements actions and priorities. */ +struct TransAp +{ + TransAp() : fromState(0), toState(0) {} + TransAp( const TransAp &other ) : + lowKey(other.lowKey), + highKey(other.highKey), + fromState(0), toState(0), + actionTable(other.actionTable), + priorTable(other.priorTable) + { + assert( lmActionTable.length() == 0 && other.lmActionTable.length() == 0 ); + } + + Key lowKey, highKey; + StateAp *fromState; + StateAp *toState; + + /* Pointers for outlist. */ + TransAp *prev, *next; + + /* Pointers for in-list. */ + TransAp *ilprev, *ilnext; + + /* The function table and priority for the transition. */ + ActionTable actionTable; + PriorTable priorTable; + + LmActionTable lmActionTable; +}; + +/* In transition list. Like DList except only has head pointers, which is all + * that is required. Insertion and deletion is handled by the graph. This + * class provides the iterator of a single list. */ +struct TransInList +{ + TransInList() : head(0) { } + + TransAp *head; + + struct Iter + { + /* Default construct. */ + Iter() : ptr(0) { } + + /* Construct, assign from a list. */ + Iter( const TransInList &il ) : ptr(il.head) { } + Iter &operator=( const TransInList &dl ) { ptr = dl.head; return *this; } + + /* At the end */ + bool lte() const { return ptr != 0; } + bool end() const { return ptr == 0; } + + /* At the first, last element. */ + bool first() const { return ptr && ptr->ilprev == 0; } + bool last() const { return ptr && ptr->ilnext == 0; } + + /* Cast, dereference, arrow ops. */ + operator TransAp*() const { return ptr; } + TransAp &operator *() const { return *ptr; } + TransAp *operator->() const { return ptr; } + + /* Increment, decrement. */ + inline void operator++(int) { ptr = ptr->ilnext; } + inline void operator--(int) { ptr = ptr->ilprev; } + + /* The iterator is simply a pointer. */ + TransAp *ptr; + }; +}; + +typedef DList<TransAp> TransList; + +/* Set of states, list of states. */ +typedef BstSet<StateAp*> StateSet; +typedef DList<StateAp> StateList; + +/* A element in a state dict. */ +struct StateDictEl +: + public AvlTreeEl<StateDictEl> +{ + StateDictEl(const StateSet &stateSet) + : stateSet(stateSet) { } + + const StateSet &getKey() { return stateSet; } + StateSet stateSet; + StateAp *targState; +}; + +/* Dictionary mapping a set of states to a target state. */ +typedef AvlTree< StateDictEl, StateSet, CmpTable<StateAp*> > StateDict; + +/* Data needed for a merge operation. */ +struct MergeData +{ + MergeData() + : stfillHead(0), stfillTail(0) { } + + StateDict stateDict; + + StateAp *stfillHead; + StateAp *stfillTail; + + void fillListAppend( StateAp *state ); +}; + +struct TransEl +{ + /* Constructors. */ + TransEl() { } + TransEl( Key lowKey, Key highKey ) + : lowKey(lowKey), highKey(highKey) { } + TransEl( Key lowKey, Key highKey, TransAp *value ) + : lowKey(lowKey), highKey(highKey), value(value) { } + + Key lowKey, highKey; + TransAp *value; +}; + +struct CmpKey +{ + static int compare( const Key key1, const Key key2 ) + { + if ( key1 < key2 ) + return -1; + else if ( key1 > key2 ) + return 1; + else + return 0; + } +}; + +/* Vector based set of key items. */ +typedef BstSet<Key, CmpKey> KeySet; + +struct MinPartition +{ + MinPartition() : active(false) { } + + StateList list; + bool active; + + MinPartition *prev, *next; +}; + +/* Epsilon transition stored in a state. Specifies the target */ +typedef Vector<int> EpsilonTrans; + +/* List of states that are to be drawn into this. */ +struct EptVectEl +{ + EptVectEl( StateAp *targ, bool leaving ) + : targ(targ), leaving(leaving) { } + + StateAp *targ; + bool leaving; +}; +typedef Vector<EptVectEl> EptVect; + +/* Set of entry ids that go into this state. */ +typedef BstSet<int> EntryIdSet; + +/* Set of longest match items that may be active in a given state. */ +typedef BstSet<LongestMatchPart*> LmItemSet; + +/* Conditions. */ +typedef BstSet< Action*, CmpCondId > CondSet; +typedef CmpTable< Action*, CmpCondId > CmpCondSet; + +struct CondSpace + : public AvlTreeEl<CondSpace> +{ + CondSpace( const CondSet &condSet ) + : condSet(condSet) {} + + const CondSet &getKey() { return condSet; } + + CondSet condSet; + Key baseKey; + long condSpaceId; +}; + +typedef Vector<CondSpace*> CondSpaceVect; + +typedef AvlTree<CondSpace, CondSet, CmpCondSet> CondSpaceMap; + +struct StateCond +{ + StateCond( Key lowKey, Key highKey ) : + lowKey(lowKey), highKey(highKey) {} + + Key lowKey; + Key highKey; + CondSpace *condSpace; + + StateCond *prev, *next; +}; + +typedef DList<StateCond> StateCondList; +typedef Vector<long> LongVect; + +struct Expansion +{ + Expansion( Key lowKey, Key highKey ) : + lowKey(lowKey), highKey(highKey), + fromTrans(0), fromCondSpace(0), + toCondSpace(0) {} + + ~Expansion() + { + if ( fromTrans != 0 ) + delete fromTrans; + } + + Key lowKey; + Key highKey; + + TransAp *fromTrans; + CondSpace *fromCondSpace; + long fromVals; + + CondSpace *toCondSpace; + LongVect toValsList; + + Expansion *prev, *next; +}; + +typedef DList<Expansion> ExpansionList; + +struct Removal +{ + Key lowKey; + Key highKey; + + Removal *next; +}; + +struct CondData +{ + CondData() : nextCondKey(0) {} + + /* Condition info. */ + Key nextCondKey; + + CondSpaceMap condSpaceMap; +}; + +extern CondData *condData; + +/* State class that implements actions and priorities. */ +struct StateAp +{ + StateAp(); + StateAp(const StateAp &other); + ~StateAp(); + + /* Is the state final? */ + bool isFinState() { return stateBits & SB_ISFINAL; } + + /* Out transition list and the pointer for the default out trans. */ + TransList outList; + + /* In transition Lists. */ + TransInList inList; + + /* Entry points into the state. */ + EntryIdSet entryIds; + + /* Epsilon transitions. */ + EpsilonTrans epsilonTrans; + + /* Condition info. */ + StateCondList stateCondList; + + /* Number of in transitions from states other than ourselves. */ + int foreignInTrans; + + /* Temporary data for various algorithms. */ + union { + /* When duplicating the fsm we need to map each + * state to the new state representing it. */ + StateAp *stateMap; + + /* When minimizing machines by partitioning, this maps to the group + * the state is in. */ + MinPartition *partition; + + /* When merging states (state machine operations) this next pointer is + * used for the list of states that need to be filled in. */ + StateAp *next; + + /* Identification for printing and stable minimization. */ + int stateNum; + + } alg; + + /* Data used in epsilon operation, maybe fit into alg? */ + StateAp *isolatedShadow; + int owningGraph; + + /* A pointer to a dict element that contains the set of states this state + * represents. This cannot go into alg, because alg.next is used during + * the merging process. */ + StateDictEl *stateDictEl; + + /* When drawing epsilon transitions, holds the list of states to merge + * with. */ + EptVect *eptVect; + + /* Bits controlling the behaviour of the state during collapsing to dfa. */ + int stateBits; + + /* State list elements. */ + StateAp *next, *prev; + + /* + * Priority and Action data. + */ + + /* Out priorities transfered to out transitions. */ + PriorTable outPriorTable; + + /* The following two action tables are distinguished by the fact that when + * toState actions are executed immediatly after transition actions of + * incoming transitions and the current character will be the same as the + * one available then. The fromState actions are executed immediately + * before the transition actions of outgoing transitions and the current + * character is same as the one available then. */ + + /* Actions to execute upon entering into a state. */ + ActionTable toStateActionTable; + + /* Actions to execute when going from the state to the transition. */ + ActionTable fromStateActionTable; + + /* Actions to add to any future transitions that leave via this state. */ + ActionTable outActionTable; + + /* Conditions to add to any future transiions that leave via this sttate. */ + ActionSet outCondSet; + + /* Error action tables. */ + ErrActionTable errActionTable; + + /* Actions to execute on eof. */ + ActionTable eofActionTable; + + /* Set of longest match items that may be active in this state. */ + LmItemSet lmItemSet; +}; + +template <class ListItem> struct NextTrans +{ + Key lowKey, highKey; + ListItem *trans; + ListItem *next; + + void load() { + if ( trans == 0 ) + next = 0; + else { + next = trans->next; + lowKey = trans->lowKey; + highKey = trans->highKey; + } + } + + void set( ListItem *t ) { + trans = t; + load(); + } + + void increment() { + trans = next; + load(); + } +}; + + +/* Encodes the different states that are meaningful to the of the iterator. */ +enum PairIterUserState +{ + RangeInS1, RangeInS2, + RangeOverlap, + BreakS1, BreakS2 +}; + +template <class ListItem1, class ListItem2 = ListItem1> struct PairIter +{ + /* Encodes the different states that an fsm iterator can be in. */ + enum IterState { + Begin, + ConsumeS1Range, ConsumeS2Range, + OnlyInS1Range, OnlyInS2Range, + S1SticksOut, S1SticksOutBreak, + S2SticksOut, S2SticksOutBreak, + S1DragsBehind, S1DragsBehindBreak, + S2DragsBehind, S2DragsBehindBreak, + ExactOverlap, End + }; + + PairIter( ListItem1 *list1, ListItem2 *list2 ); + + /* Query iterator. */ + bool lte() { return itState != End; } + bool end() { return itState == End; } + void operator++(int) { findNext(); } + void operator++() { findNext(); } + + /* Iterator state. */ + ListItem1 *list1; + ListItem2 *list2; + IterState itState; + PairIterUserState userState; + + NextTrans<ListItem1> s1Tel; + NextTrans<ListItem2> s2Tel; + Key bottomLow, bottomHigh; + ListItem1 *bottomTrans1; + ListItem2 *bottomTrans2; + +private: + void findNext(); +}; + +/* Init the iterator by advancing to the first item. */ +template <class ListItem1, class ListItem2> PairIter<ListItem1, ListItem2>::PairIter( + ListItem1 *list1, ListItem2 *list2 ) +: + list1(list1), + list2(list2), + itState(Begin) +{ + findNext(); +} + +/* Return and re-entry for the co-routine iterators. This should ALWAYS be + * used inside of a block. */ +#define CO_RETURN(label) \ + itState = label; \ + return; \ + entry##label: backIn = true + +/* Return and re-entry for the co-routine iterators. This should ALWAYS be + * used inside of a block. */ +#define CO_RETURN2(label, uState) \ + itState = label; \ + userState = uState; \ + return; \ + entry##label: backIn = true + +/* Advance to the next transition. When returns, trans points to the next + * transition, unless there are no more, in which case end() returns true. */ +template <class ListItem1, class ListItem2> void PairIter<ListItem1, ListItem2>::findNext() +{ + /* This variable is used in dummy statements that follow the entry + * goto labels. The compiler needs some statement to follow the label. */ + bool backIn; + + /* Jump into the iterator routine base on the iterator state. */ + switch ( itState ) { + case Begin: goto entryBegin; + case ConsumeS1Range: goto entryConsumeS1Range; + case ConsumeS2Range: goto entryConsumeS2Range; + case OnlyInS1Range: goto entryOnlyInS1Range; + case OnlyInS2Range: goto entryOnlyInS2Range; + case S1SticksOut: goto entryS1SticksOut; + case S1SticksOutBreak: goto entryS1SticksOutBreak; + case S2SticksOut: goto entryS2SticksOut; + case S2SticksOutBreak: goto entryS2SticksOutBreak; + case S1DragsBehind: goto entryS1DragsBehind; + case S1DragsBehindBreak: goto entryS1DragsBehindBreak; + case S2DragsBehind: goto entryS2DragsBehind; + case S2DragsBehindBreak: goto entryS2DragsBehindBreak; + case ExactOverlap: goto entryExactOverlap; + case End: goto entryEnd; + } + +entryBegin: + /* Set up the next structs at the head of the transition lists. */ + s1Tel.set( list1 ); + s2Tel.set( list2 ); + + /* Concurrently scan both out ranges. */ + while ( true ) { + if ( s1Tel.trans == 0 ) { + /* We are at the end of state1's ranges. Process the rest of + * state2's ranges. */ + while ( s2Tel.trans != 0 ) { + /* Range is only in s2. */ + CO_RETURN2( ConsumeS2Range, RangeInS2 ); + s2Tel.increment(); + } + break; + } + else if ( s2Tel.trans == 0 ) { + /* We are at the end of state2's ranges. Process the rest of + * state1's ranges. */ + while ( s1Tel.trans != 0 ) { + /* Range is only in s1. */ + CO_RETURN2( ConsumeS1Range, RangeInS1 ); + s1Tel.increment(); + } + break; + } + /* Both state1's and state2's transition elements are good. + * The signiture of no overlap is a back key being in front of a + * front key. */ + else if ( s1Tel.highKey < s2Tel.lowKey ) { + /* A range exists in state1 that does not overlap with state2. */ + CO_RETURN2( OnlyInS1Range, RangeInS1 ); + s1Tel.increment(); + } + else if ( s2Tel.highKey < s1Tel.lowKey ) { + /* A range exists in state2 that does not overlap with state1. */ + CO_RETURN2( OnlyInS2Range, RangeInS2 ); + s2Tel.increment(); + } + /* There is overlap, must mix the ranges in some way. */ + else if ( s1Tel.lowKey < s2Tel.lowKey ) { + /* Range from state1 sticks out front. Must break it into + * non-overlaping and overlaping segments. */ + bottomLow = s2Tel.lowKey; + bottomHigh = s1Tel.highKey; + s1Tel.highKey = s2Tel.lowKey; + s1Tel.highKey.decrement(); + bottomTrans1 = s1Tel.trans; + + /* Notify the caller that we are breaking s1. This gives them a + * chance to duplicate s1Tel[0,1].value. */ + CO_RETURN2( S1SticksOutBreak, BreakS1 ); + + /* Broken off range is only in s1. */ + CO_RETURN2( S1SticksOut, RangeInS1 ); + + /* Advance over the part sticking out front. */ + s1Tel.lowKey = bottomLow; + s1Tel.highKey = bottomHigh; + s1Tel.trans = bottomTrans1; + } + else if ( s2Tel.lowKey < s1Tel.lowKey ) { + /* Range from state2 sticks out front. Must break it into + * non-overlaping and overlaping segments. */ + bottomLow = s1Tel.lowKey; + bottomHigh = s2Tel.highKey; + s2Tel.highKey = s1Tel.lowKey; + s2Tel.highKey.decrement(); + bottomTrans2 = s2Tel.trans; + + /* Notify the caller that we are breaking s2. This gives them a + * chance to duplicate s2Tel[0,1].value. */ + CO_RETURN2( S2SticksOutBreak, BreakS2 ); + + /* Broken off range is only in s2. */ + CO_RETURN2( S2SticksOut, RangeInS2 ); + + /* Advance over the part sticking out front. */ + s2Tel.lowKey = bottomLow; + s2Tel.highKey = bottomHigh; + s2Tel.trans = bottomTrans2; + } + /* Low ends are even. Are the high ends even? */ + else if ( s1Tel.highKey < s2Tel.highKey ) { + /* Range from state2 goes longer than the range from state1. We + * must break the range from state2 into an evenly overlaping + * segment. */ + bottomLow = s1Tel.highKey; + bottomLow.increment(); + bottomHigh = s2Tel.highKey; + s2Tel.highKey = s1Tel.highKey; + bottomTrans2 = s2Tel.trans; + + /* Notify the caller that we are breaking s2. This gives them a + * chance to duplicate s2Tel[0,1].value. */ + CO_RETURN2( S2DragsBehindBreak, BreakS2 ); + + /* Breaking s2 produces exact overlap. */ + CO_RETURN2( S2DragsBehind, RangeOverlap ); + + /* Advance over the front we just broke off of range 2. */ + s2Tel.lowKey = bottomLow; + s2Tel.highKey = bottomHigh; + s2Tel.trans = bottomTrans2; + + /* Advance over the entire s1Tel. We have consumed it. */ + s1Tel.increment(); + } + else if ( s2Tel.highKey < s1Tel.highKey ) { + /* Range from state1 goes longer than the range from state2. We + * must break the range from state1 into an evenly overlaping + * segment. */ + bottomLow = s2Tel.highKey; + bottomLow.increment(); + bottomHigh = s1Tel.highKey; + s1Tel.highKey = s2Tel.highKey; + bottomTrans1 = s1Tel.trans; + + /* Notify the caller that we are breaking s1. This gives them a + * chance to duplicate s2Tel[0,1].value. */ + CO_RETURN2( S1DragsBehindBreak, BreakS1 ); + + /* Breaking s1 produces exact overlap. */ + CO_RETURN2( S1DragsBehind, RangeOverlap ); + + /* Advance over the front we just broke off of range 1. */ + s1Tel.lowKey = bottomLow; + s1Tel.highKey = bottomHigh; + s1Tel.trans = bottomTrans1; + + /* Advance over the entire s2Tel. We have consumed it. */ + s2Tel.increment(); + } + else { + /* There is an exact overlap. */ + CO_RETURN2( ExactOverlap, RangeOverlap ); + + s1Tel.increment(); + s2Tel.increment(); + } + } + + /* Done, go into end state. */ + CO_RETURN( End ); +} + + +/* Compare lists of epsilon transitions. Entries are name ids of targets. */ +typedef CmpTable< int, CmpOrd<int> > CmpEpsilonTrans; + +/* Compare class for the Approximate minimization. */ +class ApproxCompare +{ +public: + ApproxCompare() { } + int compare( const StateAp *pState1, const StateAp *pState2 ); +}; + +/* Compare class for the initial partitioning of a partition minimization. */ +class InitPartitionCompare +{ +public: + InitPartitionCompare() { } + int compare( const StateAp *pState1, const StateAp *pState2 ); +}; + +/* Compare class for the regular partitioning of a partition minimization. */ +class PartitionCompare +{ +public: + PartitionCompare() { } + int compare( const StateAp *pState1, const StateAp *pState2 ); +}; + +/* Compare class for a minimization that marks pairs. Provides the shouldMark + * routine. */ +class MarkCompare +{ +public: + MarkCompare() { } + bool shouldMark( MarkIndex &markIndex, const StateAp *pState1, + const StateAp *pState2 ); +}; + +/* List of partitions. */ +typedef DList< MinPartition > PartitionList; + +/* List of transtions out of a state. */ +typedef Vector<TransEl> TransListVect; + +/* Entry point map used for keeping track of entry points in a machine. */ +typedef BstSet< int > EntryIdSet; +typedef BstMapEl< int, StateAp* > EntryMapEl; +typedef BstMap< int, StateAp* > EntryMap; +typedef Vector<EntryMapEl> EntryMapBase; + +/* Graph class that implements actions and priorities. */ +struct FsmAp +{ + /* Constructors/Destructors. */ + FsmAp( ); + FsmAp( const FsmAp &graph ); + ~FsmAp(); + + /* The list of states. */ + StateList stateList; + StateList misfitList; + + /* The map of entry points. */ + EntryMap entryPoints; + + /* The start state. */ + StateAp *startState; + + /* Error state, possibly created only when the final machine has been + * created and the XML machine is about to be written. No transitions + * point to this state. */ + StateAp *errState; + + /* The set of final states. */ + StateSet finStateSet; + + /* Misfit Accounting. Are misfits put on a separate list. */ + bool misfitAccounting; + + /* + * Transition actions and priorities. + */ + + /* Set priorities on transtions. */ + void startFsmPrior( int ordering, PriorDesc *prior ); + void allTransPrior( int ordering, PriorDesc *prior ); + void finishFsmPrior( int ordering, PriorDesc *prior ); + void leaveFsmPrior( int ordering, PriorDesc *prior ); + + /* Action setting support. */ + void transferErrorActions( StateAp *state, int transferPoint ); + void setErrorAction( StateAp *state, int ordering, Action *action ); + + /* Fill all spaces in a transition list with an error transition. */ + void fillGaps( StateAp *state ); + + /* Similar to setErrorAction, instead gives a state to go to on error. */ + void setErrorTarget( StateAp *state, StateAp *target, int *orderings, + Action **actions, int nActs ); + + /* Set actions to execute. */ + void startFsmAction( int ordering, Action *action ); + void allTransAction( int ordering, Action *action ); + void finishFsmAction( int ordering, Action *action ); + void leaveFsmAction( int ordering, Action *action ); + void longMatchAction( int ordering, LongestMatchPart *lmPart ); + + /* Set conditions. */ + CondSpace *addCondSpace( const CondSet &condSet ); + + void findEmbedExpansions( ExpansionList &expansionList, + StateAp *destState, Action *condAction ); + void embedCondition( MergeData &md, StateAp *state, Action *condAction ); + void embedCondition( StateAp *state, Action *condAction ); + + void startFsmCondition( Action *condAction ); + void allTransCondition( Action *condAction ); + void leaveFsmCondition( Action *condAction ); + + /* Set error actions to execute. */ + void startErrorAction( int ordering, Action *action, int transferPoint ); + void allErrorAction( int ordering, Action *action, int transferPoint ); + void finalErrorAction( int ordering, Action *action, int transferPoint ); + void notStartErrorAction( int ordering, Action *action, int transferPoint ); + void notFinalErrorAction( int ordering, Action *action, int transferPoint ); + void middleErrorAction( int ordering, Action *action, int transferPoint ); + + /* Set EOF actions. */ + void startEOFAction( int ordering, Action *action ); + void allEOFAction( int ordering, Action *action ); + void finalEOFAction( int ordering, Action *action ); + void notStartEOFAction( int ordering, Action *action ); + void notFinalEOFAction( int ordering, Action *action ); + void middleEOFAction( int ordering, Action *action ); + + /* Set To State actions. */ + void startToStateAction( int ordering, Action *action ); + void allToStateAction( int ordering, Action *action ); + void finalToStateAction( int ordering, Action *action ); + void notStartToStateAction( int ordering, Action *action ); + void notFinalToStateAction( int ordering, Action *action ); + void middleToStateAction( int ordering, Action *action ); + + /* Set From State actions. */ + void startFromStateAction( int ordering, Action *action ); + void allFromStateAction( int ordering, Action *action ); + void finalFromStateAction( int ordering, Action *action ); + void notStartFromStateAction( int ordering, Action *action ); + void notFinalFromStateAction( int ordering, Action *action ); + void middleFromStateAction( int ordering, Action *action ); + + /* Shift the action ordering of the start transitions to start at + * fromOrder and increase in units of 1. Useful before kleene star + * operation. */ + int shiftStartActionOrder( int fromOrder ); + + /* Clear all priorities from the fsm to so they won't affcet minimization + * of the final fsm. */ + void clearAllPriorities(); + + /* Zero out all the function keys. */ + void nullActionKeys(); + + /* Walk the list of states and verify state properties. */ + void verifyStates(); + + /* Misfit Accounting. Are misfits put on a separate list. */ + void setMisfitAccounting( bool val ) + { misfitAccounting = val; } + + /* Set and Unset a state as final. */ + void setFinState( StateAp *state ); + void unsetFinState( StateAp *state ); + + void setStartState( StateAp *state ); + void unsetStartState( ); + + /* Set and unset a state as an entry point. */ + void setEntry( int id, StateAp *state ); + void changeEntry( int id, StateAp *to, StateAp *from ); + void unsetEntry( int id, StateAp *state ); + void unsetEntry( int id ); + void unsetAllEntryPoints(); + + /* Epsilon transitions. */ + void epsilonTrans( int id ); + void shadowReadWriteStates( MergeData &md ); + + /* + * Basic attaching and detaching. + */ + + /* Common to attaching/detaching list and default. */ + void attachToInList( StateAp *from, StateAp *to, TransAp *&head, TransAp *trans ); + void detachFromInList( StateAp *from, StateAp *to, TransAp *&head, TransAp *trans ); + + /* Attach with a new transition. */ + TransAp *attachNewTrans( StateAp *from, StateAp *to, + Key onChar1, Key onChar2 ); + + /* Attach with an existing transition that already in an out list. */ + void attachTrans( StateAp *from, StateAp *to, TransAp *trans ); + + /* Redirect a transition away from error and towards some state. */ + void redirectErrorTrans( StateAp *from, StateAp *to, TransAp *trans ); + + /* Detach a transition from a target state. */ + void detachTrans( StateAp *from, StateAp *to, TransAp *trans ); + + /* Detach a state from the graph. */ + void detachState( StateAp *state ); + + /* + * NFA to DFA conversion routines. + */ + + /* Duplicate a transition that will dropin to a free spot. */ + TransAp *dupTrans( StateAp *from, TransAp *srcTrans ); + + /* In crossing, two transitions both go to real states. */ + TransAp *fsmAttachStates( MergeData &md, StateAp *from, + TransAp *destTrans, TransAp *srcTrans ); + + /* Two transitions are to be crossed, handle the possibility of either + * going to the error state. */ + TransAp *mergeTrans( MergeData &md, StateAp *from, + TransAp *destTrans, TransAp *srcTrans ); + + /* Compare deterimne relative priorities of two transition tables. */ + int comparePrior( const PriorTable &priorTable1, const PriorTable &priorTable2 ); + + /* Cross a src transition with one that is already occupying a spot. */ + TransAp *crossTransitions( MergeData &md, StateAp *from, + TransAp *destTrans, TransAp *srcTrans ); + + void outTransCopy( MergeData &md, StateAp *dest, TransAp *srcList ); + + void doRemove( MergeData &md, StateAp *destState, ExpansionList &expList1 ); + void doExpand( MergeData &md, StateAp *destState, ExpansionList &expList1 ); + void findCondExpInTrans( ExpansionList &expansionList, StateAp *state, + Key lowKey, Key highKey, CondSpace *fromCondSpace, CondSpace *toCondSpace, + long destVals, LongVect &toValsList ); + void findTransExpansions( ExpansionList &expansionList, + StateAp *destState, StateAp *srcState ); + void findCondExpansions( ExpansionList &expansionList, + StateAp *destState, StateAp *srcState ); + void mergeStateConds( StateAp *destState, StateAp *srcState ); + + /* Merge a set of states into newState. */ + void mergeStates( MergeData &md, StateAp *destState, + StateAp **srcStates, int numSrc ); + void mergeStatesLeaving( MergeData &md, StateAp *destState, StateAp *srcState ); + void mergeStates( MergeData &md, StateAp *destState, StateAp *srcState ); + + /* Make all states that are combinations of other states and that + * have not yet had their out transitions filled in. This will + * empty out stateDict and stFil. */ + void fillInStates( MergeData &md ); + + /* + * Transition Comparison. + */ + + /* Compare transition data. Either of the pointers may be null. */ + static inline int compareDataPtr( TransAp *trans1, TransAp *trans2 ); + + /* Compare target state and transition data. Either pointer may be null. */ + static inline int compareFullPtr( TransAp *trans1, TransAp *trans2 ); + + /* Compare target partitions. Either pointer may be null. */ + static inline int comparePartPtr( TransAp *trans1, TransAp *trans2 ); + + /* Check marked status of target states. Either pointer may be null. */ + static inline bool shouldMarkPtr( MarkIndex &markIndex, + TransAp *trans1, TransAp *trans2 ); + + /* + * Callbacks. + */ + + /* Compare priority and function table of transitions. */ + static int compareTransData( TransAp *trans1, TransAp *trans2 ); + + /* Add in the properties of srcTrans into this. */ + void addInTrans( TransAp *destTrans, TransAp *srcTrans ); + + /* Compare states on data stored in the states. */ + static int compareStateData( const StateAp *state1, const StateAp *state2 ); + + /* Out transition data. */ + void clearOutData( StateAp *state ); + bool hasOutData( StateAp *state ); + void transferOutData( StateAp *destState, StateAp *srcState ); + + /* + * Allocation. + */ + + /* New up a state and add it to the graph. */ + StateAp *addState(); + + /* + * Building basic machines + */ + + void concatFsm( Key c ); + void concatFsm( Key *str, int len ); + void concatFsmCI( Key *str, int len ); + void orFsm( Key *set, int len ); + void rangeFsm( Key low, Key high ); + void rangeStarFsm( Key low, Key high ); + void emptyFsm( ); + void lambdaFsm( ); + + /* + * Fsm operators. + */ + + void starOp( ); + void repeatOp( int times ); + void optionalRepeatOp( int times ); + void concatOp( FsmAp *other ); + void unionOp( FsmAp *other ); + void intersectOp( FsmAp *other ); + void subtractOp( FsmAp *other ); + void epsilonOp(); + void joinOp( int startId, int finalId, FsmAp **others, int numOthers ); + void globOp( FsmAp **others, int numOthers ); + void deterministicEntry(); + + /* + * Operator workers + */ + + /* Determine if there are any entry points into a start state other than + * the start state. */ + bool isStartStateIsolated(); + + /* Make a new start state that has no entry points. Will not change the + * identity of the fsm. */ + void isolateStartState(); + + /* Workers for resolving epsilon transitions. */ + bool inEptVect( EptVect *eptVect, StateAp *targ ); + void epsilonFillEptVectFrom( StateAp *root, StateAp *from, bool parentLeaving ); + void resolveEpsilonTrans( MergeData &md ); + + /* Workers for concatenation and union. */ + void doConcat( FsmAp *other, StateSet *fromStates, bool optional ); + void doOr( FsmAp *other ); + + /* + * Final states + */ + + /* Unset any final states that are no longer to be final + * due to final bits. */ + void unsetIncompleteFinals(); + void unsetKilledFinals(); + + /* Bring in other's entry points. Assumes others states are going to be + * copied into this machine. */ + void copyInEntryPoints( FsmAp *other ); + + /* Ordering states. */ + void depthFirstOrdering( StateAp *state ); + void depthFirstOrdering(); + void sortStatesByFinal(); + + /* Set sqequential state numbers starting at 0. */ + void setStateNumbers( int base ); + + /* Unset all final states. */ + void unsetAllFinStates(); + + /* Set the bits of final states and clear the bits of non final states. */ + void setFinBits( int finStateBits ); + + /* + * Self-consistency checks. + */ + + /* Run a sanity check on the machine. */ + void verifyIntegrity(); + + /* Verify that there are no unreachable states, or dead end states. */ + void verifyReachability(); + void verifyNoDeadEndStates(); + + /* + * Path pruning + */ + + /* Mark all states reachable from state. */ + void markReachableFromHereReverse( StateAp *state ); + + /* Mark all states reachable from state. */ + void markReachableFromHere( StateAp *state ); + void markReachableFromHereStopFinal( StateAp *state ); + + /* Removes states that cannot be reached by any path in the fsm and are + * thus wasted silicon. */ + void removeDeadEndStates(); + + /* Removes states that cannot be reached by any path in the fsm and are + * thus wasted silicon. */ + void removeUnreachableStates(); + + /* Remove error actions from states on which the error transition will + * never be taken. */ + bool outListCovers( StateAp *state ); + bool anyErrorRange( StateAp *state ); + + /* Remove states that are on the misfit list. */ + void removeMisfits(); + + /* + * FSM Minimization + */ + + /* Minimization by partitioning. */ + void minimizePartition1(); + void minimizePartition2(); + + /* Minimize the final state Machine. The result is the minimal fsm. Slow + * but stable, correct minimization. Uses n^2 space (lookout) and average + * n^2 time. Worst case n^3 time, but a that is a very rare case. */ + void minimizeStable(); + + /* Minimize the final state machine. Does not find the minimal fsm, but a + * pretty good approximation. Does not use any extra space. Average n^2 + * time. Worst case n^3 time, but a that is a very rare case. */ + void minimizeApproximate(); + + /* This is the worker for the minimize approximate solution. It merges + * states that have identical out transitions. */ + bool minimizeRound( ); + + /* Given an intial partioning of states, split partitions that have out trans + * to differing partitions. */ + int partitionRound( StateAp **statePtrs, MinPartition *parts, int numParts ); + + /* Split partitions that have a transition to a previously split partition, until + * there are no more partitions to split. */ + int splitCandidates( StateAp **statePtrs, MinPartition *parts, int numParts ); + + /* Fuse together states in the same partition. */ + void fusePartitions( MinPartition *parts, int numParts ); + + /* Mark pairs where out final stateness differs, out trans data differs, + * trans pairs go to a marked pair or trans data differs. Should get + * alot of pairs. */ + void initialMarkRound( MarkIndex &markIndex ); + + /* One marking round on all state pairs. Considers if trans pairs go + * to a marked state only. Returns whether or not a pair was marked. */ + bool markRound( MarkIndex &markIndex ); + + /* Move the in trans into src into dest. */ + void inTransMove(StateAp *dest, StateAp *src); + + /* Make state src and dest the same state. */ + void fuseEquivStates(StateAp *dest, StateAp *src); + + /* Find any states that didn't get marked by the marking algorithm and + * merge them into the primary states of their equivalence class. */ + void fuseUnmarkedPairs( MarkIndex &markIndex ); + + /* Merge neighboring transitions go to the same state and have the same + * transitions data. */ + void compressTransitions(); + + /* Returns true if there is a transtion (either explicit or by a gap) to + * the error state. */ + bool checkErrTrans( StateAp *state, TransAp *trans ); + bool checkErrTransFinish( StateAp *state ); + bool hasErrorTrans(); + + /* Check if a machine defines a single character. This is useful in + * validating ranges and machines to export. */ + bool checkSingleCharMachine( ); +}; + + +#endif /* _FSMGRAPH_H */ diff --git a/contrib/tools/ragel5/ragel/fsmmin.cpp b/contrib/tools/ragel5/ragel/fsmmin.cpp new file mode 100644 index 0000000000..046d11afa6 --- /dev/null +++ b/contrib/tools/ragel5/ragel/fsmmin.cpp @@ -0,0 +1,732 @@ +/* + * Copyright 2002 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "fsmgraph.h" +#include "mergesort.h" + +int FsmAp::partitionRound( StateAp **statePtrs, MinPartition *parts, int numParts ) +{ + /* Need a mergesort object and a single partition compare. */ + MergeSort<StateAp*, PartitionCompare> mergeSort; + PartitionCompare partCompare; + + /* For each partition. */ + for ( int p = 0; p < numParts; p++ ) { + /* Fill the pointer array with the states in the partition. */ + StateList::Iter state = parts[p].list; + for ( int s = 0; state.lte(); state++, s++ ) + statePtrs[s] = state; + + /* Sort the states using the partitioning compare. */ + int numStates = parts[p].list.length(); + mergeSort.sort( statePtrs, numStates ); + + /* Assign the states into partitions based on the results of the sort. */ + int destPart = p, firstNewPart = numParts; + for ( int s = 1; s < numStates; s++ ) { + /* If this state differs from the last then move to the next partition. */ + if ( partCompare.compare( statePtrs[s-1], statePtrs[s] ) < 0 ) { + /* The new partition is the next avail spot. */ + destPart = numParts; + numParts += 1; + } + + /* If the state is not staying in the first partition, then + * transfer it to its destination partition. */ + if ( destPart != p ) { + StateAp *state = parts[p].list.detach( statePtrs[s] ); + parts[destPart].list.append( state ); + } + } + + /* Fix the partition pointer for all the states that got moved to a new + * partition. This must be done after the states are transfered so the + * result of the sort is not altered. */ + for ( int newPart = firstNewPart; newPart < numParts; newPart++ ) { + StateList::Iter state = parts[newPart].list; + for ( ; state.lte(); state++ ) + state->alg.partition = &parts[newPart]; + } + } + + return numParts; +} + +/** + * \brief Minimize by partitioning version 1. + * + * Repeatedly tries to split partitions until all partitions are unsplittable. + * Produces the most minimal FSM possible. + */ +void FsmAp::minimizePartition1() +{ + /* Need one mergesort object and partition compares. */ + MergeSort<StateAp*, InitPartitionCompare> mergeSort; + InitPartitionCompare initPartCompare; + + /* Nothing to do if there are no states. */ + if ( stateList.length() == 0 ) + return; + + /* + * First thing is to partition the states by final state status and + * transition functions. This gives us an initial partitioning to work + * with. + */ + + /* Make a array of pointers to states. */ + int numStates = stateList.length(); + StateAp** statePtrs = new StateAp*[numStates]; + + /* Fill up an array of pointers to the states for easy sorting. */ + StateList::Iter state = stateList; + for ( int s = 0; state.lte(); state++, s++ ) + statePtrs[s] = state; + + /* Sort the states using the array of states. */ + mergeSort.sort( statePtrs, numStates ); + + /* An array of lists of states is used to partition the states. */ + MinPartition *parts = new MinPartition[numStates]; + + /* Assign the states into partitions. */ + int destPart = 0; + for ( int s = 0; s < numStates; s++ ) { + /* If this state differs from the last then move to the next partition. */ + if ( s > 0 && initPartCompare.compare( statePtrs[s-1], statePtrs[s] ) < 0 ) { + /* Move to the next partition. */ + destPart += 1; + } + + /* Put the state into its partition. */ + statePtrs[s]->alg.partition = &parts[destPart]; + parts[destPart].list.append( statePtrs[s] ); + } + + /* We just moved all the states from the main list into partitions without + * taking them off the main list. So clean up the main list now. */ + stateList.abandon(); + + /* Split partitions. */ + int numParts = destPart + 1; + while ( true ) { + /* Test all partitions for splitting. */ + int newNum = partitionRound( statePtrs, parts, numParts ); + + /* When no partitions can be split, stop. */ + if ( newNum == numParts ) + break; + + numParts = newNum; + } + + /* Fuse states in the same partition. The states will end up back on the + * main list. */ + fusePartitions( parts, numParts ); + + /* Cleanup. */ + delete[] statePtrs; + delete[] parts; +} + +/* Split partitions that need splittting, decide which partitions might need + * to be split as a result, continue until there are no more that might need + * to be split. */ +int FsmAp::splitCandidates( StateAp **statePtrs, MinPartition *parts, int numParts ) +{ + /* Need a mergesort and a partition compare. */ + MergeSort<StateAp*, PartitionCompare> mergeSort; + PartitionCompare partCompare; + + /* The lists of unsplitable (partList) and splitable partitions. + * Only partitions in the splitable list are check for needing splitting. */ + PartitionList partList, splittable; + + /* Initially, all partitions are born from a split (the initial + * partitioning) and can cause other partitions to be split. So any + * partition with a state with a transition out to another partition is a + * candidate for splitting. This will make every partition except possibly + * partitions of final states split candidates. */ + for ( int p = 0; p < numParts; p++ ) { + /* Assume not active. */ + parts[p].active = false; + + /* Look for a trans out of any state in the partition. */ + for ( StateList::Iter state = parts[p].list; state.lte(); state++ ) { + /* If there is at least one transition out to another state then + * the partition becomes splittable. */ + if ( state->outList.length() > 0 ) { + parts[p].active = true; + break; + } + } + + /* If it was found active then it goes on the splittable list. */ + if ( parts[p].active ) + splittable.append( &parts[p] ); + else + partList.append( &parts[p] ); + } + + /* While there are partitions that are splittable, pull one off and try + * to split it. If it splits, determine which partitions may now be split + * as a result of the newly split partition. */ + while ( splittable.length() > 0 ) { + MinPartition *partition = splittable.detachFirst(); + + /* Fill the pointer array with the states in the partition. */ + StateList::Iter state = partition->list; + for ( int s = 0; state.lte(); state++, s++ ) + statePtrs[s] = state; + + /* Sort the states using the partitioning compare. */ + int numStates = partition->list.length(); + mergeSort.sort( statePtrs, numStates ); + + /* Assign the states into partitions based on the results of the sort. */ + MinPartition *destPart = partition; + int firstNewPart = numParts; + for ( int s = 1; s < numStates; s++ ) { + /* If this state differs from the last then move to the next partition. */ + if ( partCompare.compare( statePtrs[s-1], statePtrs[s] ) < 0 ) { + /* The new partition is the next avail spot. */ + destPart = &parts[numParts]; + numParts += 1; + } + + /* If the state is not staying in the first partition, then + * transfer it to its destination partition. */ + if ( destPart != partition ) { + StateAp *state = partition->list.detach( statePtrs[s] ); + destPart->list.append( state ); + } + } + + /* Fix the partition pointer for all the states that got moved to a new + * partition. This must be done after the states are transfered so the + * result of the sort is not altered. */ + int newPart; + for ( newPart = firstNewPart; newPart < numParts; newPart++ ) { + StateList::Iter state = parts[newPart].list; + for ( ; state.lte(); state++ ) + state->alg.partition = &parts[newPart]; + } + + /* Put the partition we just split and any new partitions that came out + * of the split onto the inactive list. */ + partition->active = false; + partList.append( partition ); + for ( newPart = firstNewPart; newPart < numParts; newPart++ ) { + parts[newPart].active = false; + partList.append( &parts[newPart] ); + } + + if ( destPart == partition ) + continue; + + /* Now determine which partitions are splittable as a result of + * splitting partition by walking the in lists of the states in + * partitions that got split. Partition is the faked first item in the + * loop. */ + MinPartition *causalPart = partition; + newPart = firstNewPart - 1; + while ( newPart < numParts ) { + /* Loop all states in the causal partition. */ + StateList::Iter state = causalPart->list; + for ( ; state.lte(); state++ ) { + /* Walk all transition into the state and put the partition + * that the from state is in onto the splittable list. */ + for ( TransInList::Iter trans = state->inList; trans.lte(); trans++ ) { + MinPartition *fromPart = trans->fromState->alg.partition; + if ( ! fromPart->active ) { + fromPart->active = true; + partList.detach( fromPart ); + splittable.append( fromPart ); + } + } + } + + newPart += 1; + causalPart = &parts[newPart]; + } + } + return numParts; +} + + +/** + * \brief Minimize by partitioning version 2 (best alg). + * + * Repeatedly tries to split partitions that may splittable until there are no + * more partitions that might possibly need splitting. Runs faster than + * version 1. Produces the most minimal fsm possible. + */ +void FsmAp::minimizePartition2() +{ + /* Need a mergesort and an initial partition compare. */ + MergeSort<StateAp*, InitPartitionCompare> mergeSort; + InitPartitionCompare initPartCompare; + + /* Nothing to do if there are no states. */ + if ( stateList.length() == 0 ) + return; + + /* + * First thing is to partition the states by final state status and + * transition functions. This gives us an initial partitioning to work + * with. + */ + + /* Make a array of pointers to states. */ + int numStates = stateList.length(); + StateAp** statePtrs = new StateAp*[numStates]; + + /* Fill up an array of pointers to the states for easy sorting. */ + StateList::Iter state = stateList; + for ( int s = 0; state.lte(); state++, s++ ) + statePtrs[s] = state; + + /* Sort the states using the array of states. */ + mergeSort.sort( statePtrs, numStates ); + + /* An array of lists of states is used to partition the states. */ + MinPartition *parts = new MinPartition[numStates]; + + /* Assign the states into partitions. */ + int destPart = 0; + for ( int s = 0; s < numStates; s++ ) { + /* If this state differs from the last then move to the next partition. */ + if ( s > 0 && initPartCompare.compare( statePtrs[s-1], statePtrs[s] ) < 0 ) { + /* Move to the next partition. */ + destPart += 1; + } + + /* Put the state into its partition. */ + statePtrs[s]->alg.partition = &parts[destPart]; + parts[destPart].list.append( statePtrs[s] ); + } + + /* We just moved all the states from the main list into partitions without + * taking them off the main list. So clean up the main list now. */ + stateList.abandon(); + + /* Split partitions. */ + int numParts = splitCandidates( statePtrs, parts, destPart+1 ); + + /* Fuse states in the same partition. The states will end up back on the + * main list. */ + fusePartitions( parts, numParts ); + + /* Cleanup. */ + delete[] statePtrs; + delete[] parts; +} + +void FsmAp::initialMarkRound( MarkIndex &markIndex ) +{ + /* P and q for walking pairs. */ + StateAp *p = stateList.head, *q; + + /* Need an initial partition compare. */ + InitPartitionCompare initPartCompare; + + /* Walk all unordered pairs of (p, q) where p != q. + * The second depth of the walk stops before reaching p. This + * gives us all unordered pairs of states (p, q) where p != q. */ + while ( p != 0 ) { + q = stateList.head; + while ( q != p ) { + /* If the states differ on final state status, out transitions or + * any transition data then they should be separated on the initial + * round. */ + if ( initPartCompare.compare( p, q ) != 0 ) + markIndex.markPair( p->alg.stateNum, q->alg.stateNum ); + + q = q->next; + } + p = p->next; + } +} + +bool FsmAp::markRound( MarkIndex &markIndex ) +{ + /* P an q for walking pairs. Take note if any pair gets marked. */ + StateAp *p = stateList.head, *q; + bool pairWasMarked = false; + + /* Need a mark comparison. */ + MarkCompare markCompare; + + /* Walk all unordered pairs of (p, q) where p != q. + * The second depth of the walk stops before reaching p. This + * gives us all unordered pairs of states (p, q) where p != q. */ + while ( p != 0 ) { + q = stateList.head; + while ( q != p ) { + /* Should we mark the pair? */ + if ( !markIndex.isPairMarked( p->alg.stateNum, q->alg.stateNum ) ) { + if ( markCompare.shouldMark( markIndex, p, q ) ) { + markIndex.markPair( p->alg.stateNum, q->alg.stateNum ); + pairWasMarked = true; + } + } + q = q->next; + } + p = p->next; + } + + return pairWasMarked; +} + + +/** + * \brief Minimize by pair marking. + * + * Decides if each pair of states is distinct or not. Uses O(n^2) memory and + * should only be used on small graphs. Produces the most minmimal FSM + * possible. + */ +void FsmAp::minimizeStable() +{ + /* Set the state numbers. */ + setStateNumbers( 0 ); + + /* This keeps track of which pairs have been marked. */ + MarkIndex markIndex( stateList.length() ); + + /* Mark pairs where final stateness, out trans, or trans data differ. */ + initialMarkRound( markIndex ); + + /* While the last round of marking succeeded in marking a state + * continue to do another round. */ + int modified = markRound( markIndex ); + while (modified) + modified = markRound( markIndex ); + + /* Merge pairs that are unmarked. */ + fuseUnmarkedPairs( markIndex ); +} + +bool FsmAp::minimizeRound() +{ + /* Nothing to do if there are no states. */ + if ( stateList.length() == 0 ) + return false; + + /* Need a mergesort on approx compare and an approx compare. */ + MergeSort<StateAp*, ApproxCompare> mergeSort; + ApproxCompare approxCompare; + + /* Fill up an array of pointers to the states. */ + StateAp **statePtrs = new StateAp*[stateList.length()]; + StateList::Iter state = stateList; + for ( int s = 0; state.lte(); state++, s++ ) + statePtrs[s] = state; + + bool modified = false; + + /* Sort The list. */ + mergeSort.sort( statePtrs, stateList.length() ); + + /* Walk the list looking for duplicates next to each other, + * merge in any duplicates. */ + StateAp **pLast = statePtrs; + StateAp **pState = statePtrs + 1; + for ( int i = 1; i < stateList.length(); i++, pState++ ) { + if ( approxCompare.compare( *pLast, *pState ) == 0 ) { + /* Last and pState are the same, so fuse together. Move forward + * with pState but not with pLast. If any more are identical, we + * must */ + fuseEquivStates( *pLast, *pState ); + modified = true; + } + else { + /* Last and this are different, do not set to merge them. Move + * pLast to the current (it may be way behind from merging many + * states) and pState forward one to consider the next pair. */ + pLast = pState; + } + } + delete[] statePtrs; + return modified; +} + +/** + * \brief Minmimize by an approximation. + * + * Repeatedly tries to find states with transitions out to the same set of + * states on the same set of keys until no more identical states can be found. + * Does not produce the most minimial FSM possible. + */ +void FsmAp::minimizeApproximate() +{ + /* While the last minimization round succeeded in compacting states, + * continue to try to compact states. */ + while ( true ) { + bool modified = minimizeRound(); + if ( ! modified ) + break; + } +} + + +/* Remove states that have no path to them from the start state. Recursively + * traverses the graph marking states that have paths into them. Then removes + * all states that did not get marked. */ +void FsmAp::removeUnreachableStates() +{ + /* Misfit accounting should be off and there should be no states on the + * misfit list. */ + assert( !misfitAccounting && misfitList.length() == 0 ); + + /* Mark all the states that can be reached + * through the existing set of entry points. */ + markReachableFromHere( startState ); + for ( EntryMap::Iter en = entryPoints; en.lte(); en++ ) + markReachableFromHere( en->value ); + + /* Delete all states that are not marked + * and unmark the ones that are marked. */ + StateAp *state = stateList.head; + while ( state ) { + StateAp *next = state->next; + + if ( state->stateBits & SB_ISMARKED ) + state->stateBits &= ~ SB_ISMARKED; + else { + detachState( state ); + stateList.detach( state ); + delete state; + } + + state = next; + } +} + +bool FsmAp::outListCovers( StateAp *state ) +{ + /* Must be at least one range to cover. */ + if ( state->outList.length() == 0 ) + return false; + + /* The first must start at the lower bound. */ + TransList::Iter trans = state->outList.first(); + if ( keyOps->minKey < trans->lowKey ) + return false; + + /* Loop starts at second el. */ + trans.increment(); + + /* Loop checks lower against prev upper. */ + for ( ; trans.lte(); trans++ ) { + /* Lower end of the trans must be one greater than the + * previous' high end. */ + Key lowKey = trans->lowKey; + lowKey.decrement(); + if ( trans->prev->highKey < lowKey ) + return false; + } + + /* Require that the last range extends to the upper bound. */ + trans = state->outList.last(); + if ( trans->highKey < keyOps->maxKey ) + return false; + + return true; +} + +/* Remove states that that do not lead to a final states. Works recursivly traversing + * the graph in reverse (starting from all final states) and marking seen states. Then + * removes states that did not get marked. */ +void FsmAp::removeDeadEndStates() +{ + /* Misfit accounting should be off and there should be no states on the + * misfit list. */ + assert( !misfitAccounting && misfitList.length() == 0 ); + + /* Mark all states that have paths to the final states. */ + StateAp **st = finStateSet.data; + int nst = finStateSet.length(); + for ( int i = 0; i < nst; i++, st++ ) + markReachableFromHereReverse( *st ); + + /* Start state gets honorary marking. If the machine accepts nothing we + * still want the start state to hang around. This must be done after the + * recursive call on all the final states so that it does not cause the + * start state in transitions to be skipped when the start state is + * visited by the traversal. */ + startState->stateBits |= SB_ISMARKED; + + /* Delete all states that are not marked + * and unmark the ones that are marked. */ + StateAp *state = stateList.head; + while ( state != 0 ) { + StateAp *next = state->next; + + if ( state->stateBits & SB_ISMARKED ) + state->stateBits &= ~ SB_ISMARKED; + else { + detachState( state ); + stateList.detach( state ); + delete state; + } + + state = next; + } +} + +/* Remove states on the misfit list. To work properly misfit accounting should + * be on when this is called. The detaching of a state will likely cause + * another misfit to be collected and it can then be removed. */ +void FsmAp::removeMisfits() +{ + while ( misfitList.length() > 0 ) { + /* Get the first state. */ + StateAp *state = misfitList.head; + + /* Detach and delete. */ + detachState( state ); + + /* The state was previously on the misfit list and detaching can only + * remove in transitions so the state must still be on the misfit + * list. */ + misfitList.detach( state ); + delete state; + } +} + +/* Fuse src into dest because they have been deemed equivalent states. + * Involves moving transitions into src to go into dest and invoking + * callbacks. Src is deleted detached from the graph and deleted. */ +void FsmAp::fuseEquivStates( StateAp *dest, StateAp *src ) +{ + /* This would get ugly. */ + assert( dest != src ); + + /* Cur is a duplicate. We can merge it with trail. */ + inTransMove( dest, src ); + + detachState( src ); + stateList.detach( src ); + delete src; +} + +void FsmAp::fuseUnmarkedPairs( MarkIndex &markIndex ) +{ + StateAp *p = stateList.head, *nextP, *q; + + /* Definition: The primary state of an equivalence class is the first state + * encounterd that belongs to the equivalence class. All equivalence + * classes have primary state including equivalence classes with one state + * in it. */ + + /* For each unmarked pair merge p into q and delete p. q is always the + * primary state of it's equivalence class. We wouldn't have landed on it + * here if it were not, because it would have been deleted. + * + * Proof that q is the primaray state of it's equivalence class: Assume q + * is not the primary state of it's equivalence class, then it would be + * merged into some state that came before it and thus p would be + * equivalent to that state. But q is the first state that p is equivalent + * to so we have a contradiction. */ + + /* Walk all unordered pairs of (p, q) where p != q. + * The second depth of the walk stops before reaching p. This + * gives us all unordered pairs of states (p, q) where p != q. */ + while ( p != 0 ) { + nextP = p->next; + + q = stateList.head; + while ( q != p ) { + /* If one of p or q is a final state then mark. */ + if ( ! markIndex.isPairMarked( p->alg.stateNum, q->alg.stateNum ) ) { + fuseEquivStates( q, p ); + break; + } + q = q->next; + } + p = nextP; + } +} + +void FsmAp::fusePartitions( MinPartition *parts, int numParts ) +{ + /* For each partition, fuse state 2, 3, ... into state 1. */ + for ( int p = 0; p < numParts; p++ ) { + /* Assume that there will always be at least one state. */ + StateAp *first = parts[p].list.head, *toFuse = first->next; + + /* Put the first state back onto the main state list. Don't bother + * removing it from the partition list first. */ + stateList.append( first ); + + /* Fuse the rest of the state into the first. */ + while ( toFuse != 0 ) { + /* Save the next. We will trash it before it is needed. */ + StateAp *next = toFuse->next; + + /* Put the state to be fused in to the first back onto the main + * list before it is fuse. the graph. The state needs to be on + * the main list for the detach from the graph to work. Don't + * bother removing the state from the partition list first. We + * need not maintain it. */ + stateList.append( toFuse ); + + /* Now fuse to the first. */ + fuseEquivStates( first, toFuse ); + + /* Go to the next that we saved before trashing the next pointer. */ + toFuse = next; + } + + /* We transfered the states from the partition list into the main list without + * removing the states from the partition list first. Clean it up. */ + parts[p].list.abandon(); + } +} + + +/* Merge neighboring transitions go to the same state and have the same + * transitions data. */ +void FsmAp::compressTransitions() +{ + for ( StateList::Iter st = stateList; st.lte(); st++ ) { + if ( st->outList.length() > 1 ) { + for ( TransList::Iter trans = st->outList, next = trans.next(); next.lte(); ) { + Key nextLow = next->lowKey; + nextLow.decrement(); + if ( trans->highKey == nextLow && trans->toState == next->toState && + CmpActionTable::compare( trans->actionTable, next->actionTable ) == 0 ) + { + trans->highKey = next->highKey; + st->outList.detach( next ); + detachTrans( next->fromState, next->toState, next ); + delete next; + next = trans.next(); + } + else { + trans.increment(); + next.increment(); + } + } + } + } +} diff --git a/contrib/tools/ragel5/ragel/fsmstate.cpp b/contrib/tools/ragel5/ragel/fsmstate.cpp new file mode 100644 index 0000000000..4322c1060f --- /dev/null +++ b/contrib/tools/ragel5/ragel/fsmstate.cpp @@ -0,0 +1,463 @@ +/* + * Copyright 2002 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <string.h> +#include <assert.h> +#include "fsmgraph.h" + +#include <iostream> +using namespace std; + +/* Construct a mark index for a specified number of states. Must new up + * an array that is states^2 in size. */ +MarkIndex::MarkIndex( int states ) : numStates(states) +{ + /* Total pairs is states^2. Actually only use half of these, but we allocate + * them all to make indexing into the array easier. */ + int total = states * states; + + /* New up chars so that individual DListEl constructors are + * not called. Zero out the mem manually. */ + array = new bool[total]; + memset( array, 0, sizeof(bool) * total ); +} + +/* Free the array used to store state pairs. */ +MarkIndex::~MarkIndex() +{ + delete[] array; +} + +/* Mark a pair of states. States are specified by their number. The + * marked states are moved from the unmarked list to the marked list. */ +void MarkIndex::markPair(int state1, int state2) +{ + int pos = ( state1 >= state2 ) ? + ( state1 * numStates ) + state2 : + ( state2 * numStates ) + state1; + + array[pos] = true; +} + +/* Returns true if the pair of states are marked. Returns false otherwise. + * Ordering of states given does not matter. */ +bool MarkIndex::isPairMarked(int state1, int state2) +{ + int pos = ( state1 >= state2 ) ? + ( state1 * numStates ) + state2 : + ( state2 * numStates ) + state1; + + return array[pos]; +} + +/* Create a new fsm state. State has not out transitions or in transitions, not + * out out transition data and not number. */ +StateAp::StateAp() +: + /* No out or in transitions. */ + outList(), + inList(), + + /* No entry points, or epsilon trans. */ + entryIds(), + epsilonTrans(), + + /* Conditions. */ + stateCondList(), + + /* No transitions in from other states. */ + foreignInTrans(0), + + /* Only used during merging. Normally null. */ + stateDictEl(0), + eptVect(0), + + /* No state identification bits. */ + stateBits(0), + + /* No Priority data. */ + outPriorTable(), + + /* No Action data. */ + toStateActionTable(), + fromStateActionTable(), + outActionTable(), + outCondSet(), + errActionTable(), + eofActionTable() +{ +} + +/* Copy everything except actual the transitions. That is left up to the + * FsmAp copy constructor. */ +StateAp::StateAp(const StateAp &other) +: + /* All lists are cleared. They will be filled in when the + * individual transitions are duplicated and attached. */ + outList(), + inList(), + + /* Duplicate the entry id set and epsilon transitions. These + * are sets of integers and as such need no fixing. */ + entryIds(other.entryIds), + epsilonTrans(other.epsilonTrans), + + /* Copy in the elements of the conditions. */ + stateCondList( other.stateCondList ), + + /* No transitions in from other states. */ + foreignInTrans(0), + + /* This is only used during merging. Normally null. */ + stateDictEl(0), + eptVect(0), + + /* Fsm state data. */ + stateBits(other.stateBits), + + /* Copy in priority data. */ + outPriorTable(other.outPriorTable), + + /* Copy in action data. */ + toStateActionTable(other.toStateActionTable), + fromStateActionTable(other.fromStateActionTable), + outActionTable(other.outActionTable), + outCondSet(other.outCondSet), + errActionTable(other.errActionTable), + eofActionTable(other.eofActionTable) +{ + /* Duplicate all the transitions. */ + for ( TransList::Iter trans = other.outList; trans.lte(); trans++ ) { + /* Dupicate and store the orginal target in the transition. This will + * be corrected once all the states have been created. */ + TransAp *newTrans = new TransAp(*trans); + newTrans->toState = trans->toState; + outList.append( newTrans ); + } +} + +/* If there is a state dict element, then delete it. Everything else is left + * up to the FsmGraph destructor. */ +StateAp::~StateAp() +{ + if ( stateDictEl != 0 ) + delete stateDictEl; +} + +/* Compare two states using pointers to the states. With the approximate + * compare the idea is that if the compare finds them the same, they can + * immediately be merged. */ +int ApproxCompare::compare( const StateAp *state1 , const StateAp *state2 ) +{ + int compareRes; + + /* Test final state status. */ + if ( (state1->stateBits & SB_ISFINAL) && !(state2->stateBits & SB_ISFINAL) ) + return -1; + else if ( !(state1->stateBits & SB_ISFINAL) && (state2->stateBits & SB_ISFINAL) ) + return 1; + + /* Test epsilon transition sets. */ + compareRes = CmpEpsilonTrans::compare( state1->epsilonTrans, + state2->epsilonTrans ); + if ( compareRes != 0 ) + return compareRes; + + /* Compare the out transitions. */ + compareRes = FsmAp::compareStateData( state1, state2 ); + if ( compareRes != 0 ) + return compareRes; + + /* Use a pair iterator to get the transition pairs. */ + PairIter<TransAp> outPair( state1->outList.head, state2->outList.head ); + for ( ; !outPair.end(); outPair++ ) { + switch ( outPair.userState ) { + + case RangeInS1: + compareRes = FsmAp::compareFullPtr( outPair.s1Tel.trans, 0 ); + if ( compareRes != 0 ) + return compareRes; + break; + + case RangeInS2: + compareRes = FsmAp::compareFullPtr( 0, outPair.s2Tel.trans ); + if ( compareRes != 0 ) + return compareRes; + break; + + case RangeOverlap: + compareRes = FsmAp::compareFullPtr( + outPair.s1Tel.trans, outPair.s2Tel.trans ); + if ( compareRes != 0 ) + return compareRes; + break; + + case BreakS1: + case BreakS2: + break; + } + } + + /* Got through the entire state comparison, deem them equal. */ + return 0; +} + +/* Compare class for the sort that does the intial partition of compaction. */ +int InitPartitionCompare::compare( const StateAp *state1 , const StateAp *state2 ) +{ + int compareRes; + + /* Test final state status. */ + if ( (state1->stateBits & SB_ISFINAL) && !(state2->stateBits & SB_ISFINAL) ) + return -1; + else if ( !(state1->stateBits & SB_ISFINAL) && (state2->stateBits & SB_ISFINAL) ) + return 1; + + /* Test epsilon transition sets. */ + compareRes = CmpEpsilonTrans::compare( state1->epsilonTrans, + state2->epsilonTrans ); + if ( compareRes != 0 ) + return compareRes; + + /* Compare the out transitions. */ + compareRes = FsmAp::compareStateData( state1, state2 ); + if ( compareRes != 0 ) + return compareRes; + + /* Use a pair iterator to test the condition pairs. */ + PairIter<StateCond> condPair( state1->stateCondList.head, state2->stateCondList.head ); + for ( ; !condPair.end(); condPair++ ) { + switch ( condPair.userState ) { + case RangeInS1: + return 1; + case RangeInS2: + return -1; + + case RangeOverlap: { + CondSpace *condSpace1 = condPair.s1Tel.trans->condSpace; + CondSpace *condSpace2 = condPair.s2Tel.trans->condSpace; + if ( condSpace1 < condSpace2 ) + return -1; + else if ( condSpace1 > condSpace2 ) + return 1; + break; + } + case BreakS1: + case BreakS2: + break; + } + } + + /* Use a pair iterator to test the transition pairs. */ + PairIter<TransAp> outPair( state1->outList.head, state2->outList.head ); + for ( ; !outPair.end(); outPair++ ) { + switch ( outPair.userState ) { + + case RangeInS1: + compareRes = FsmAp::compareDataPtr( outPair.s1Tel.trans, 0 ); + if ( compareRes != 0 ) + return compareRes; + break; + + case RangeInS2: + compareRes = FsmAp::compareDataPtr( 0, outPair.s2Tel.trans ); + if ( compareRes != 0 ) + return compareRes; + break; + + case RangeOverlap: + compareRes = FsmAp::compareDataPtr( + outPair.s1Tel.trans, outPair.s2Tel.trans ); + if ( compareRes != 0 ) + return compareRes; + break; + + case BreakS1: + case BreakS2: + break; + } + } + + return 0; +} + +/* Compare class for the sort that does the partitioning. */ +int PartitionCompare::compare( const StateAp *state1, const StateAp *state2 ) +{ + int compareRes; + + /* Use a pair iterator to get the transition pairs. */ + PairIter<TransAp> outPair( state1->outList.head, state2->outList.head ); + for ( ; !outPair.end(); outPair++ ) { + switch ( outPair.userState ) { + + case RangeInS1: + compareRes = FsmAp::comparePartPtr( outPair.s1Tel.trans, 0 ); + if ( compareRes != 0 ) + return compareRes; + break; + + case RangeInS2: + compareRes = FsmAp::comparePartPtr( 0, outPair.s2Tel.trans ); + if ( compareRes != 0 ) + return compareRes; + break; + + case RangeOverlap: + compareRes = FsmAp::comparePartPtr( + outPair.s1Tel.trans, outPair.s2Tel.trans ); + if ( compareRes != 0 ) + return compareRes; + break; + + case BreakS1: + case BreakS2: + break; + } + } + + return 0; +} + +/* Compare class for the sort that does the partitioning. */ +bool MarkCompare::shouldMark( MarkIndex &markIndex, const StateAp *state1, + const StateAp *state2 ) +{ + /* Use a pair iterator to get the transition pairs. */ + PairIter<TransAp> outPair( state1->outList.head, state2->outList.head ); + for ( ; !outPair.end(); outPair++ ) { + switch ( outPair.userState ) { + + case RangeInS1: + if ( FsmAp::shouldMarkPtr( markIndex, outPair.s1Tel.trans, 0 ) ) + return true; + break; + + case RangeInS2: + if ( FsmAp::shouldMarkPtr( markIndex, 0, outPair.s2Tel.trans ) ) + return true; + break; + + case RangeOverlap: + if ( FsmAp::shouldMarkPtr( markIndex, + outPair.s1Tel.trans, outPair.s2Tel.trans ) ) + return true; + break; + + case BreakS1: + case BreakS2: + break; + } + } + + return false; +} + +/* + * Transition Comparison. + */ + +/* Compare target partitions. Either pointer may be null. */ +int FsmAp::comparePartPtr( TransAp *trans1, TransAp *trans2 ) +{ + if ( trans1 != 0 ) { + /* If trans1 is set then so should trans2. The initial partitioning + * guarantees this for us. */ + if ( trans1->toState == 0 && trans2->toState != 0 ) + return -1; + else if ( trans1->toState != 0 && trans2->toState == 0 ) + return 1; + else if ( trans1->toState != 0 ) { + /* Both of targets are set. */ + return CmpOrd< MinPartition* >::compare( + trans1->toState->alg.partition, trans2->toState->alg.partition ); + } + } + return 0; +} + + +/* Compares two transition pointers according to priority and functions. + * Either pointer may be null. Does not consider to state or from state. */ +int FsmAp::compareDataPtr( TransAp *trans1, TransAp *trans2 ) +{ + if ( trans1 == 0 && trans2 != 0 ) + return -1; + else if ( trans1 != 0 && trans2 == 0 ) + return 1; + else if ( trans1 != 0 ) { + /* Both of the transition pointers are set. */ + int compareRes = compareTransData( trans1, trans2 ); + if ( compareRes != 0 ) + return compareRes; + } + return 0; +} + +/* Compares two transitions according to target state, priority and functions. + * Does not consider from state. Either of the pointers may be null. */ +int FsmAp::compareFullPtr( TransAp *trans1, TransAp *trans2 ) +{ + if ( (trans1 != 0) ^ (trans2 != 0) ) { + /* Exactly one of the transitions is set. */ + if ( trans1 != 0 ) + return -1; + else + return 1; + } + else if ( trans1 != 0 ) { + /* Both of the transition pointers are set. Test target state, + * priority and funcs. */ + if ( trans1->toState < trans2->toState ) + return -1; + else if ( trans1->toState > trans2->toState ) + return 1; + else if ( trans1->toState != 0 ) { + /* Test transition data. */ + int compareRes = compareTransData( trans1, trans2 ); + if ( compareRes != 0 ) + return compareRes; + } + } + return 0; +} + + +bool FsmAp::shouldMarkPtr( MarkIndex &markIndex, TransAp *trans1, + TransAp *trans2 ) +{ + if ( (trans1 != 0) ^ (trans2 != 0) ) { + /* Exactly one of the transitions is set. The initial mark round + * should rule out this case. */ + assert( false ); + } + else if ( trans1 != 0 ) { + /* Both of the transitions are set. If the target pair is marked, then + * the pair we are considering gets marked. */ + return markIndex.isPairMarked( trans1->toState->alg.stateNum, + trans2->toState->alg.stateNum ); + } + + /* Neither of the transitiosn are set. */ + return false; +} + + diff --git a/contrib/tools/ragel5/ragel/main.cpp b/contrib/tools/ragel5/ragel/main.cpp new file mode 100644 index 0000000000..a22a34f1b0 --- /dev/null +++ b/contrib/tools/ragel5/ragel/main.cpp @@ -0,0 +1,355 @@ +/* + * Copyright 2001-2007 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <stdlib.h> +#include <string.h> +#include <stdio.h> +#include <iostream> +#include <fstream> +#ifndef _WIN32 +# include <unistd.h> +#endif +#include <sstream> + +/* Parsing. */ +#include "ragel.h" +#include "rlscan.h" + +/* Parameters and output. */ +#include "pcheck.h" +#include "vector.h" +#include "version.h" +#include "common.h" + +#ifdef _MSC_VER +# define strncasecmp _strnicmp
+# define strcasecmp _stricmp
+#endif + +using std::istream; +using std::ostream; +using std::ifstream; +using std::ofstream; +using std::cin; +using std::cout; +using std::cerr; +using std::endl; + +/* Controls minimization. */ +MinimizeLevel minimizeLevel = MinimizePartition2; +MinimizeOpt minimizeOpt = MinimizeMostOps; + +/* Graphviz dot file generation. */ +char *machineSpec = 0, *machineName = 0; +bool machineSpecFound = false; + +bool printStatistics = false; + +/* Print a summary of the options. */ +void usage() +{ + cout << +"usage: ragel [options] file\n" +"general:\n" +" -h, -H, -?, --help Print this usage and exit\n" +" -v, --version Print version information and exit\n" +" -o <file> Write output to <file>\n" +" -s Print some statistics on stderr\n" +"fsm minimization:\n" +" -n Do not perform minimization\n" +" -m Minimize at the end of the compilation\n" +" -l Minimize after most operations (default)\n" +" -e Minimize after every operation\n" +"machine selection:\n" +" -S <spec> FSM specification to output for -V\n" +" -M <machine> Machine definition/instantiation to output for -V\n" +"host language:\n" +" -C The host language is C, C++, Obj-C or Obj-C++ (default)\n" +" -D The host language is D\n" +" -J The host language is Java\n" +" -R The host language is Ruby\n" + ; +} + +/* Print version information. */ +void version() +{ + cout << "Ragel State Machine Compiler version " VERSION << " " PUBDATE << endl << + "Copyright (c) 2001-2006 by Adrian Thurston" << endl; +} + +/* Total error count. */ +int gblErrorCount = 0; + +/* Print the opening to a warning in the input, then return the error ostream. */ +ostream &warning( const InputLoc &loc ) +{ + assert( loc.fileName != 0 ); + cerr << loc.fileName << ":" << loc.line << ":" << + loc.col << ": warning: "; + return cerr; +} + +/* Print the opening to a program error, then return the error stream. */ +ostream &error() +{ + gblErrorCount += 1; + cerr << PROGNAME ": "; + return cerr; +} + +ostream &error( const InputLoc &loc ) +{ + gblErrorCount += 1; + assert( loc.fileName != 0 ); + cerr << loc.fileName << ":" << loc.line << ": "; + return cerr; +} + +void escapeLineDirectivePath( std::ostream &out, char *path ) +{ + for ( char *pc = path; *pc != 0; pc++ ) { + if ( *pc == '\\' ) + out << "\\\\"; + else + out << *pc; + } +} + +/* Main, process args and call yyparse to start scanning input. */ +int main(int argc, char **argv) +{ + ParamCheck pc("o:nmleabjkS:M:CDJRvHh?-:s", argc, argv); + char *inputFileName = 0; + char inputFileNameArr[] = "<stdin>"; + char *outputFileName = 0; + + while ( pc.check() ) { + switch ( pc.state ) { + case ParamCheck::match: + switch ( pc.parameter ) { + /* Output. */ + case 'o': + if ( *pc.parameterArg == 0 ) + error() << "a zero length output file name was given" << endl; + else if ( outputFileName != 0 ) + error() << "more than one output file name was given" << endl; + else { + /* Ok, remember the output file name. */ + outputFileName = pc.parameterArg; + } + break; + + /* Minimization, mostly hidden options. */ + case 'n': + minimizeOpt = MinimizeNone; + break; + case 'm': + minimizeOpt = MinimizeEnd; + break; + case 'l': + minimizeOpt = MinimizeMostOps; + break; + case 'e': + minimizeOpt = MinimizeEveryOp; + break; + case 'a': + minimizeLevel = MinimizeApprox; + break; + case 'b': + minimizeLevel = MinimizeStable; + break; + case 'j': + minimizeLevel = MinimizePartition1; + break; + case 'k': + minimizeLevel = MinimizePartition2; + break; + + /* Machine spec. */ + case 'S': + if ( *pc.parameterArg == 0 ) + error() << "please specify an argument to -S" << endl; + else if ( machineSpec != 0 ) + error() << "more than one -S argument was given" << endl; + else { + /* Ok, remember the path to the machine to generate. */ + machineSpec = pc.parameterArg; + } + break; + + /* Machine path. */ + case 'M': + if ( *pc.parameterArg == 0 ) + error() << "please specify an argument to -M" << endl; + else if ( machineName != 0 ) + error() << "more than one -M argument was given" << endl; + else { + /* Ok, remember the machine name to generate. */ + machineName = pc.parameterArg; + } + break; + + /* Host language types. */ + case 'C': + hostLangType = CCode; + hostLang = &hostLangC; + break; + case 'D': + hostLangType = DCode; + hostLang = &hostLangD; + break; + case 'J': + hostLangType = JavaCode; + hostLang = &hostLangJava; + break; + case 'R': + hostLangType = RubyCode; + hostLang = &hostLangRuby; + break; + + /* Version and help. */ + case 'v': + version(); + exit(0); + case 'H': case 'h': case '?': + usage(); + exit(0); + case 's': + printStatistics = true; + break; + case '-': + if ( strcasecmp(pc.parameterArg, "help") == 0 ) { + usage(); + exit(0); + } + else if ( strcasecmp(pc.parameterArg, "version") == 0 ) { + version(); + exit(0); + } + else { + error() << "--" << pc.parameterArg << + " is an invalid argument" << endl; + } + } + break; + + case ParamCheck::invalid: + error() << "-" << pc.parameter << " is an invalid argument" << endl; + break; + + case ParamCheck::noparam: + /* It is interpreted as an input file. */ + if ( *pc.curArg == 0 ) + error() << "a zero length input file name was given" << endl; + else if ( inputFileName != 0 ) + error() << "more than one input file name was given" << endl; + else { + /* OK, Remember the filename. */ + inputFileName = pc.curArg; + } + break; + } + } + + /* Bail on above errors. */ + if ( gblErrorCount > 0 ) + exit(1); + + /* Make sure we are not writing to the same file as the input file. */ + if ( inputFileName != 0 && outputFileName != 0 && + strcmp( inputFileName, outputFileName ) == 0 ) + { + error() << "output file \"" << outputFileName << + "\" is the same as the input file" << endl; + } + + /* Open the input file for reading. */ + istream *inStream; + if ( inputFileName != 0 ) { + /* Open the input file for reading. */ + ifstream *inFile = new ifstream( inputFileName ); + inStream = inFile; + if ( ! inFile->is_open() ) + error() << "could not open " << inputFileName << " for reading" << endl; + } + else { + inStream = &cin; + } + + + /* Bail on above errors. */ + if ( gblErrorCount > 0 ) + exit(1); + + std::ostringstream outputBuffer; + + if ( machineSpec == 0 && machineName == 0 ) + outputBuffer << "<host line=\"1\" col=\"1\">"; + +#if defined _WIN32 || defined _WIN64 + if (inputFileName != 0) { + NormalizeWinPath(inputFileName); + } +#endif + if (inputFileName == 0) { + inputFileName = inputFileNameArr; + } + + if (strrchr(inputFileName, '/') == NULL) { + error() << "input file path should be absolute: " << inputFileName << endl; + exit(1); + } + + Scanner scanner( inputFileName, *inStream, outputBuffer, 0, 0, 0, false ); + scanner.do_scan(); + + /* Finished, final check for errors.. */ + if ( gblErrorCount > 0 ) + return 1; + + /* Now send EOF to all parsers. */ + terminateAllParsers(); + + /* Finished, final check for errors.. */ + if ( gblErrorCount > 0 ) + return 1; + + if ( machineSpec == 0 && machineName == 0 ) + outputBuffer << "</host>\n"; + + if ( gblErrorCount > 0 ) + return 1; + + ostream *outputFile = 0; + if ( outputFileName != 0 ) + outputFile = new ofstream( outputFileName ); + else + outputFile = &cout; + + /* Write the machines, then the surrounding code. */ + writeMachines( *outputFile, outputBuffer.str(), inputFileName ); + + if ( outputFileName != 0 ) + delete outputFile; + + return 0; +} diff --git a/contrib/tools/ragel5/ragel/parsedata.cpp b/contrib/tools/ragel5/ragel/parsedata.cpp new file mode 100644 index 0000000000..3e14cc618a --- /dev/null +++ b/contrib/tools/ragel5/ragel/parsedata.cpp @@ -0,0 +1,1505 @@ +/* + * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <iostream> +#include <iomanip> +#include <errno.h> +#include <stdlib.h> +#include <limits.h> + +#include "ragel.h" +#include "rlparse.h" +#include "parsedata.h" +#include "parsetree.h" +#include "mergesort.h" +#include "xmlcodegen.h" + +using namespace std; + +char mainMachine[] = "main"; + +void Token::set(const char *str, int len ) +{ + length = len; + data = new char[len+1]; + memcpy( data, str, len ); + data[len] = 0; +} + +void Token::append( const Token &other ) +{ + int newLength = length + other.length; + char *newString = new char[newLength+1]; + memcpy( newString, data, length ); + memcpy( newString + length, other.data, other.length ); + newString[newLength] = 0; + data = newString; + length = newLength; +} + +/* Perform minimization after an operation according + * to the command line args. */ +void afterOpMinimize( FsmAp *fsm, bool lastInSeq ) +{ + /* Switch on the prefered minimization algorithm. */ + if ( minimizeOpt == MinimizeEveryOp || minimizeOpt == MinimizeMostOps && lastInSeq ) { + /* First clean up the graph. FsmAp operations may leave these + * lying around. There should be no dead end states. The subtract + * intersection operators are the only places where they may be + * created and those operators clean them up. */ + fsm->removeUnreachableStates(); + + switch ( minimizeLevel ) { + case MinimizeApprox: + fsm->minimizeApproximate(); + break; + case MinimizePartition1: + fsm->minimizePartition1(); + break; + case MinimizePartition2: + fsm->minimizePartition2(); + break; + case MinimizeStable: + fsm->minimizeStable(); + break; + } + } +} + +/* Count the transitions in the fsm by walking the state list. */ +int countTransitions( FsmAp *fsm ) +{ + int numTrans = 0; + StateAp *state = fsm->stateList.head; + while ( state != 0 ) { + numTrans += state->outList.length(); + state = state->next; + } + return numTrans; +} + +Key makeFsmKeyHex( char *str, const InputLoc &loc, ParseData *pd ) +{ + /* Reset errno so we can check for overflow or underflow. In the event of + * an error, sets the return val to the upper or lower bound being tested + * against. */ + errno = 0; + unsigned int size = keyOps->alphType->size; + bool unusedBits = size < sizeof(unsigned long); + + unsigned long ul = strtoul( str, 0, 16 ); + + if ( errno == ERANGE || unusedBits && ul >> (size * 8) ) { + error(loc) << "literal " << str << " overflows the alphabet type" << endl; + ul = 1 << (size * 8); + } + + if ( unusedBits && keyOps->alphType->isSigned && ul >> (size * 8 - 1) ) + ul |= (0xffffffff >> (size*8 ) ) << (size*8); + + return Key( (long)ul ); +} + +#ifdef _MSC_VER +# define strtoll _strtoi64 +#endif + +Key makeFsmKeyDec( char *str, const InputLoc &loc, ParseData *pd ) +{ + /* Convert the number to a decimal. First reset errno so we can check + * for overflow or underflow. */ + errno = 0; + long long minVal = keyOps->alphType->minVal; + long long maxVal = keyOps->alphType->maxVal; + + long long ll = strtoll( str, 0, 10 ); + + /* Check for underflow. */ + if ( errno == ERANGE && ll < 0 || ll < minVal) { + error(loc) << "literal " << str << " underflows the alphabet type" << endl; + ll = minVal; + } + /* Check for overflow. */ + else if ( errno == ERANGE && ll > 0 || ll > maxVal ) { + error(loc) << "literal " << str << " overflows the alphabet type" << endl; + ll = maxVal; + } + + if ( keyOps->alphType->isSigned ) + return Key( (long)ll ); + else + return Key( (unsigned long)ll ); +} + +/* Make an fsm key in int format (what the fsm graph uses) from an alphabet + * number returned by the parser. Validates that the number doesn't overflow + * the alphabet type. */ +Key makeFsmKeyNum( char *str, const InputLoc &loc, ParseData *pd ) +{ + /* Switch on hex/decimal format. */ + if ( str[0] == '0' && str[1] == 'x' ) + return makeFsmKeyHex( str, loc, pd ); + else + return makeFsmKeyDec( str, loc, pd ); +} + +/* Make an fsm int format (what the fsm graph uses) from a single character. + * Performs proper conversion depending on signed/unsigned property of the + * alphabet. */ +Key makeFsmKeyChar( char c, ParseData *pd ) +{ + if ( keyOps->isSigned ) { + /* Copy from a char type. */ + return Key( c ); + } + else { + /* Copy from an unsigned byte type. */ + return Key( (unsigned char)c ); + } +} + +/* Make an fsm key array in int format (what the fsm graph uses) from a string + * of characters. Performs proper conversion depending on signed/unsigned + * property of the alphabet. */ +void makeFsmKeyArray( Key *result, char *data, int len, ParseData *pd ) +{ + if ( keyOps->isSigned ) { + /* Copy from a char star type. */ + char *src = data; + for ( int i = 0; i < len; i++ ) + result[i] = Key(src[i]); + } + else { + /* Copy from an unsigned byte ptr type. */ + unsigned char *src = (unsigned char*) data; + for ( int i = 0; i < len; i++ ) + result[i] = Key(src[i]); + } +} + +/* Like makeFsmKeyArray except the result has only unique keys. They ordering + * will be changed. */ +void makeFsmUniqueKeyArray( KeySet &result, char *data, int len, + bool caseInsensitive, ParseData *pd ) +{ + /* Use a transitions list for getting unique keys. */ + if ( keyOps->isSigned ) { + /* Copy from a char star type. */ + char *src = data; + for ( int si = 0; si < len; si++ ) { + Key key( src[si] ); + result.insert( key ); + if ( caseInsensitive ) { + if ( key.isLower() ) + result.insert( key.toUpper() ); + else if ( key.isUpper() ) + result.insert( key.toLower() ); + } + } + } + else { + /* Copy from an unsigned byte ptr type. */ + unsigned char *src = (unsigned char*) data; + for ( int si = 0; si < len; si++ ) { + Key key( src[si] ); + result.insert( key ); + if ( caseInsensitive ) { + if ( key.isLower() ) + result.insert( key.toUpper() ); + else if ( key.isUpper() ) + result.insert( key.toLower() ); + } + } + } +} + +FsmAp *dotFsm( ParseData *pd ) +{ + FsmAp *retFsm = new FsmAp(); + retFsm->rangeFsm( keyOps->minKey, keyOps->maxKey ); + return retFsm; +} + +FsmAp *dotStarFsm( ParseData *pd ) +{ + FsmAp *retFsm = new FsmAp(); + retFsm->rangeStarFsm( keyOps->minKey, keyOps->maxKey ); + return retFsm; +} + +/* Make a builtin type. Depends on the signed nature of the alphabet type. */ +FsmAp *makeBuiltin( BuiltinMachine builtin, ParseData *pd ) +{ + /* FsmAp created to return. */ + FsmAp *retFsm = 0; + bool isSigned = keyOps->isSigned; + + switch ( builtin ) { + case BT_Any: { + /* All characters. */ + retFsm = dotFsm( pd ); + break; + } + case BT_Ascii: { + /* Ascii characters 0 to 127. */ + retFsm = new FsmAp(); + retFsm->rangeFsm( 0, 127 ); + break; + } + case BT_Extend: { + /* Ascii extended characters. This is the full byte range. Dependent + * on signed, vs no signed. If the alphabet is one byte then just use + * dot fsm. */ + if ( isSigned ) { + retFsm = new FsmAp(); + retFsm->rangeFsm( -128, 127 ); + } + else { + retFsm = new FsmAp(); + retFsm->rangeFsm( 0, 255 ); + } + break; + } + case BT_Alpha: { + /* Alpha [A-Za-z]. */ + FsmAp *upper = new FsmAp(), *lower = new FsmAp(); + upper->rangeFsm( 'A', 'Z' ); + lower->rangeFsm( 'a', 'z' ); + upper->unionOp( lower ); + upper->minimizePartition2(); + retFsm = upper; + break; + } + case BT_Digit: { + /* Digits [0-9]. */ + retFsm = new FsmAp(); + retFsm->rangeFsm( '0', '9' ); + break; + } + case BT_Alnum: { + /* Alpha numerics [0-9A-Za-z]. */ + FsmAp *digit = new FsmAp(), *lower = new FsmAp(); + FsmAp *upper = new FsmAp(); + digit->rangeFsm( '0', '9' ); + upper->rangeFsm( 'A', 'Z' ); + lower->rangeFsm( 'a', 'z' ); + digit->unionOp( upper ); + digit->unionOp( lower ); + digit->minimizePartition2(); + retFsm = digit; + break; + } + case BT_Lower: { + /* Lower case characters. */ + retFsm = new FsmAp(); + retFsm->rangeFsm( 'a', 'z' ); + break; + } + case BT_Upper: { + /* Upper case characters. */ + retFsm = new FsmAp(); + retFsm->rangeFsm( 'A', 'Z' ); + break; + } + case BT_Cntrl: { + /* Control characters. */ + FsmAp *cntrl = new FsmAp(); + FsmAp *highChar = new FsmAp(); + cntrl->rangeFsm( 0, 31 ); + highChar->concatFsm( 127 ); + cntrl->unionOp( highChar ); + cntrl->minimizePartition2(); + retFsm = cntrl; + break; + } + case BT_Graph: { + /* Graphical ascii characters [!-~]. */ + retFsm = new FsmAp(); + retFsm->rangeFsm( '!', '~' ); + break; + } + case BT_Print: { + /* Printable characters. Same as graph except includes space. */ + retFsm = new FsmAp(); + retFsm->rangeFsm( ' ', '~' ); + break; + } + case BT_Punct: { + /* Punctuation. */ + FsmAp *range1 = new FsmAp(); + FsmAp *range2 = new FsmAp(); + FsmAp *range3 = new FsmAp(); + FsmAp *range4 = new FsmAp(); + range1->rangeFsm( '!', '/' ); + range2->rangeFsm( ':', '@' ); + range3->rangeFsm( '[', '`' ); + range4->rangeFsm( '{', '~' ); + range1->unionOp( range2 ); + range1->unionOp( range3 ); + range1->unionOp( range4 ); + range1->minimizePartition2(); + retFsm = range1; + break; + } + case BT_Space: { + /* Whitespace: [\t\v\f\n\r ]. */ + FsmAp *cntrl = new FsmAp(); + FsmAp *space = new FsmAp(); + cntrl->rangeFsm( '\t', '\r' ); + space->concatFsm( ' ' ); + cntrl->unionOp( space ); + cntrl->minimizePartition2(); + retFsm = cntrl; + break; + } + case BT_Xdigit: { + /* Hex digits [0-9A-Fa-f]. */ + FsmAp *digit = new FsmAp(); + FsmAp *upper = new FsmAp(); + FsmAp *lower = new FsmAp(); + digit->rangeFsm( '0', '9' ); + upper->rangeFsm( 'A', 'F' ); + lower->rangeFsm( 'a', 'f' ); + digit->unionOp( upper ); + digit->unionOp( lower ); + digit->minimizePartition2(); + retFsm = digit; + break; + } + case BT_Lambda: { + retFsm = new FsmAp(); + retFsm->lambdaFsm(); + break; + } + case BT_Empty: { + retFsm = new FsmAp(); + retFsm->emptyFsm(); + break; + }} + + return retFsm; +} + +/* Check if this name inst or any name inst below is referenced. */ +bool NameInst::anyRefsRec() +{ + if ( numRefs > 0 ) + return true; + + /* Recurse on children until true. */ + for ( NameVect::Iter ch = childVect; ch.lte(); ch++ ) { + if ( (*ch)->anyRefsRec() ) + return true; + } + + return false; +} + +/* + * ParseData + */ + +/* Initialize the structure that will collect info during the parse of a + * machine. */ +ParseData::ParseData(const char *fileName, char *sectionName, + const InputLoc §ionLoc ) +: + sectionGraph(0), + generatingSectionSubset(false), + nextPriorKey(0), + /* 0 is reserved for global error actions. */ + nextLocalErrKey(1), + nextNameId(0), + nextCondId(0), + alphTypeSet(false), + getKeyExpr(0), + accessExpr(0), + curStateExpr(0), + lowerNum(0), + upperNum(0), + fileName(fileName), + sectionName(sectionName), + sectionLoc(sectionLoc), + errorCount(0), + curActionOrd(0), + curPriorOrd(0), + rootName(0), + exportsRootName(0), + nextEpsilonResolvedLink(0), + nextLongestMatchId(1), + lmRequiresErrorState(false) +{ + /* Initialize the dictionary of graphs. This is our symbol table. The + * initialization needs to be done on construction which happens at the + * beginning of a machine spec so any assignment operators can reference + * the builtins. */ + initGraphDict(); +} + +/* Clean up the data collected during a parse. */ +ParseData::~ParseData() +{ + /* Delete all the nodes in the action list. Will cause all the + * string data that represents the actions to be deallocated. */ + actionList.empty(); +} + +/* Make a name id in the current name instantiation scope if it is not + * already there. */ +NameInst *ParseData::addNameInst( const InputLoc &loc, const char *data, bool isLabel ) +{ + /* Create the name instantitaion object and insert it. */ + NameInst *newNameInst = new NameInst( loc, curNameInst, data, nextNameId++, isLabel ); + curNameInst->childVect.append( newNameInst ); + if ( data != 0 ) + curNameInst->children.insertMulti( data, newNameInst ); + return newNameInst; +} + +void ParseData::initNameWalk() +{ + curNameInst = rootName; + curNameChild = 0; +} + +void ParseData::initExportsNameWalk() +{ + curNameInst = exportsRootName; + curNameChild = 0; +} + +/* Goes into the next child scope. The number of the child is already set up. + * We need this for the syncronous name tree and parse tree walk to work + * properly. It is reset on entry into a scope and advanced on poping of a + * scope. A call to enterNameScope should be accompanied by a corresponding + * popNameScope. */ +NameFrame ParseData::enterNameScope( bool isLocal, int numScopes ) +{ + /* Save off the current data. */ + NameFrame retFrame; + retFrame.prevNameInst = curNameInst; + retFrame.prevNameChild = curNameChild; + retFrame.prevLocalScope = localNameScope; + + /* Enter into the new name scope. */ + for ( int i = 0; i < numScopes; i++ ) { + curNameInst = curNameInst->childVect[curNameChild]; + curNameChild = 0; + } + + if ( isLocal ) + localNameScope = curNameInst; + + return retFrame; +} + +/* Return from a child scope to a parent. The parent info must be specified as + * an argument and is obtained from the corresponding call to enterNameScope. + * */ +void ParseData::popNameScope( const NameFrame &frame ) +{ + /* Pop the name scope. */ + curNameInst = frame.prevNameInst; + curNameChild = frame.prevNameChild+1; + localNameScope = frame.prevLocalScope; +} + +void ParseData::resetNameScope( const NameFrame &frame ) +{ + /* Pop the name scope. */ + curNameInst = frame.prevNameInst; + curNameChild = frame.prevNameChild; + localNameScope = frame.prevLocalScope; +} + + +void ParseData::unsetObsoleteEntries( FsmAp *graph ) +{ + /* Loop the reference names and increment the usage. Names that are no + * longer needed will be unset in graph. */ + for ( NameVect::Iter ref = curNameInst->referencedNames; ref.lte(); ref++ ) { + /* Get the name. */ + NameInst *name = *ref; + name->numUses += 1; + + /* If the name is no longer needed unset its corresponding entry. */ + if ( name->numUses == name->numRefs ) { + assert( graph->entryPoints.find( name->id ) != 0 ); + graph->unsetEntry( name->id ); + assert( graph->entryPoints.find( name->id ) == 0 ); + } + } +} + +NameSet ParseData::resolvePart( NameInst *refFrom, const char *data, bool recLabelsOnly ) +{ + /* Queue needed for breadth-first search, load it with the start node. */ + NameInstList nameQueue; + nameQueue.append( refFrom ); + + NameSet result; + while ( nameQueue.length() > 0 ) { + /* Pull the next from location off the queue. */ + NameInst *from = nameQueue.detachFirst(); + + /* Look for the name. */ + NameMapEl *low, *high; + if ( from->children.findMulti( data, low, high ) ) { + /* Record all instances of the name. */ + for ( ; low <= high; low++ ) + result.insert( low->value ); + } + + /* Name not there, do breadth-first operation of appending all + * childrent to the processing queue. */ + for ( NameVect::Iter name = from->childVect; name.lte(); name++ ) { + if ( !recLabelsOnly || (*name)->isLabel ) + nameQueue.append( *name ); + } + } + + /* Queue exhausted and name never found. */ + return result; +} + +void ParseData::resolveFrom( NameSet &result, NameInst *refFrom, + const NameRef &nameRef, int namePos ) +{ + /* Look for the name in the owning scope of the factor with aug. */ + NameSet partResult = resolvePart( refFrom, nameRef[namePos], false ); + + /* If there are more parts to the name then continue on. */ + if ( ++namePos < nameRef.length() ) { + /* There are more components to the name, search using all the part + * results as the base. */ + for ( NameSet::Iter name = partResult; name.lte(); name++ ) + resolveFrom( result, *name, nameRef, namePos ); + } + else { + /* This is the last component, append the part results to the final + * results. */ + result.insert( partResult ); + } +} + +/* Write out a name reference. */ +ostream &operator<<( ostream &out, const NameRef &nameRef ) +{ + int pos = 0; + if ( nameRef[pos] == 0 ) { + out << "::"; + pos += 1; + } + out << nameRef[pos++]; + for ( ; pos < nameRef.length(); pos++ ) + out << "::" << nameRef[pos]; + return out; +} + +ostream &operator<<( ostream &out, const NameInst &nameInst ) +{ + /* Count the number fully qualified name parts. */ + int numParents = 0; + NameInst *curParent = nameInst.parent; + while ( curParent != 0 ) { + numParents += 1; + curParent = curParent->parent; + } + + /* Make an array and fill it in. */ + curParent = nameInst.parent; + NameInst **parents = new NameInst*[numParents]; + for ( int p = numParents-1; p >= 0; p-- ) { + parents[p] = curParent; + curParent = curParent->parent; + } + + /* Write the parents out, skip the root. */ + for ( int p = 1; p < numParents; p++ ) + out << "::" << ( parents[p]->name != 0 ? parents[p]->name : "<ANON>" ); + + /* Write the name and cleanup. */ + out << "::" << ( nameInst.name != 0 ? nameInst.name : "<ANON>" ); + delete[] parents; + return out; +} + +struct CmpNameInstLoc +{ + static int compare( const NameInst *ni1, const NameInst *ni2 ) + { + if ( ni1->loc.line < ni2->loc.line ) + return -1; + else if ( ni1->loc.line > ni2->loc.line ) + return 1; + else if ( ni1->loc.col < ni2->loc.col ) + return -1; + else if ( ni1->loc.col > ni2->loc.col ) + return 1; + return 0; + } +}; + +void errorStateLabels( const NameSet &resolved ) +{ + MergeSort<NameInst*, CmpNameInstLoc> mergeSort; + mergeSort.sort( resolved.data, resolved.length() ); + for ( NameSet::Iter res = resolved; res.lte(); res++ ) + error((*res)->loc) << " -> " << **res << endl; +} + + +NameInst *ParseData::resolveStateRef( const NameRef &nameRef, InputLoc &loc, Action *action ) +{ + NameInst *nameInst = 0; + + /* Do the local search if the name is not strictly a root level name + * search. */ + if ( nameRef[0] != 0 ) { + /* If the action is referenced, resolve all of them. */ + if ( action != 0 && action->actionRefs.length() > 0 ) { + /* Look for the name in all referencing scopes. */ + NameSet resolved; + for ( ActionRefs::Iter actRef = action->actionRefs; actRef.lte(); actRef++ ) + resolveFrom( resolved, *actRef, nameRef, 0 ); + + if ( resolved.length() > 0 ) { + /* Take the first one. */ + nameInst = resolved[0]; + if ( resolved.length() > 1 ) { + /* Complain about the multiple references. */ + error(loc) << "state reference " << nameRef << + " resolves to multiple entry points" << endl; + errorStateLabels( resolved ); + } + } + } + } + + /* If not found in the local scope, look in global. */ + if ( nameInst == 0 ) { + NameSet resolved; + int fromPos = nameRef[0] != 0 ? 0 : 1; + resolveFrom( resolved, rootName, nameRef, fromPos ); + + if ( resolved.length() > 0 ) { + /* Take the first. */ + nameInst = resolved[0]; + if ( resolved.length() > 1 ) { + /* Complain about the multiple references. */ + error(loc) << "state reference " << nameRef << + " resolves to multiple entry points" << endl; + errorStateLabels( resolved ); + } + } + } + + if ( nameInst == 0 ) { + /* If not found then complain. */ + error(loc) << "could not resolve state reference " << nameRef << endl; + } + return nameInst; +} + +void ParseData::resolveNameRefs( InlineList *inlineList, Action *action ) +{ + for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) { + switch ( item->type ) { + case InlineItem::Entry: case InlineItem::Goto: + case InlineItem::Call: case InlineItem::Next: { + /* Resolve, pass action for local search. */ + NameInst *target = resolveStateRef( *item->nameRef, item->loc, action ); + + /* Check if the target goes into a longest match. */ + NameInst *search = target->parent; + while ( search != 0 ) { + if ( search->isLongestMatch ) { + error(item->loc) << "cannot enter inside a longest " + "match construction as an entry point" << endl; + break; + } + search = search->parent; + } + + /* Note the reference in the name. This will cause the entry + * point to survive to the end of the graph generating walk. */ + if ( target != 0 ) + target->numRefs += 1; + item->nameTarg = target; + break; + } + default: + break; + } + + /* Some of the item types may have children. */ + if ( item->children != 0 ) + resolveNameRefs( item->children, action ); + } +} + +/* Resolve references to labels in actions. */ +void ParseData::resolveActionNameRefs() +{ + for ( ActionList::Iter act = actionList; act.lte(); act++ ) { + /* Only care about the actions that are referenced. */ + if ( act->actionRefs.length() > 0 ) + resolveNameRefs( act->inlineList, act ); + } +} + +/* Walk a name tree starting at from and fill the name index. */ +void ParseData::fillNameIndex( NameInst *from ) +{ + /* Fill the value for from in the name index. */ + nameIndex[from->id] = from; + + /* Recurse on the implicit final state and then all children. */ + if ( from->final != 0 ) + fillNameIndex( from->final ); + for ( NameVect::Iter name = from->childVect; name.lte(); name++ ) + fillNameIndex( *name ); +} + +void ParseData::makeRootNames() +{ + /* Create the root name. */ + rootName = new NameInst( InputLoc(), 0, 0, nextNameId++, false ); + exportsRootName = new NameInst( InputLoc(), 0, 0, nextNameId++, false ); +} + +/* Build the name tree and supporting data structures. */ +void ParseData::makeNameTree( GraphDictEl *dictEl ) +{ + /* Set up curNameInst for the walk. */ + initNameWalk(); + + if ( dictEl != 0 ) { + /* A start location has been specified. */ + dictEl->value->makeNameTree( dictEl->loc, this ); + } + else { + /* First make the name tree. */ + for ( GraphList::Iter glel = instanceList; glel.lte(); glel++ ) { + /* Recurse on the instance. */ + glel->value->makeNameTree( glel->loc, this ); + } + } + + /* The number of nodes in the tree can now be given by nextNameId */ + nameIndex = new NameInst*[nextNameId]; + memset( nameIndex, 0, sizeof(NameInst*)*nextNameId ); + fillNameIndex( rootName ); + fillNameIndex( exportsRootName ); +} + + +void ParseData::createBuiltin(const char *name, BuiltinMachine builtin ) +{ + Expression *expression = new Expression( builtin ); + Join *join = new Join( expression ); + JoinOrLm *joinOrLm = new JoinOrLm( join ); + VarDef *varDef = new VarDef( name, joinOrLm ); + GraphDictEl *graphDictEl = new GraphDictEl( name, varDef ); + graphDict.insert( graphDictEl ); +} + +/* Initialize the graph dict with builtin types. */ +void ParseData::initGraphDict( ) +{ + createBuiltin( "any", BT_Any ); + createBuiltin( "ascii", BT_Ascii ); + createBuiltin( "extend", BT_Extend ); + createBuiltin( "alpha", BT_Alpha ); + createBuiltin( "digit", BT_Digit ); + createBuiltin( "alnum", BT_Alnum ); + createBuiltin( "lower", BT_Lower ); + createBuiltin( "upper", BT_Upper ); + createBuiltin( "cntrl", BT_Cntrl ); + createBuiltin( "graph", BT_Graph ); + createBuiltin( "print", BT_Print ); + createBuiltin( "punct", BT_Punct ); + createBuiltin( "space", BT_Space ); + createBuiltin( "xdigit", BT_Xdigit ); + createBuiltin( "null", BT_Lambda ); + createBuiltin( "zlen", BT_Lambda ); + createBuiltin( "empty", BT_Empty ); +} + +/* Set the alphabet type. If the types are not valid returns false. */ +bool ParseData::setAlphType( char *s1, char *s2 ) +{ + bool valid = false; + for ( int i = 0; i < hostLang->numHostTypes; i++ ) { + if ( strcmp( s1, hostLang->hostTypes[i].data1 ) == 0 && + hostLang->hostTypes[i].data2 != 0 && + strcmp( s2, hostLang->hostTypes[i].data2 ) == 0 ) + { + valid = true; + userAlphType = hostLang->hostTypes + i; + break; + } + } + + alphTypeSet = true; + return valid; +} + +/* Set the alphabet type. If the types are not valid returns false. */ +bool ParseData::setAlphType( char *s1 ) +{ + bool valid = false; + for ( int i = 0; i < hostLang->numHostTypes; i++ ) { + if ( strcmp( s1, hostLang->hostTypes[i].data1 ) == 0 && + hostLang->hostTypes[i].data2 == 0 ) + { + valid = true; + userAlphType = hostLang->hostTypes + i; + break; + } + } + + alphTypeSet = true; + return valid; +} + +/* Initialize the key operators object that will be referenced by all fsms + * created. */ +void ParseData::initKeyOps( ) +{ + /* Signedness and bounds. */ + HostType *alphType = alphTypeSet ? userAlphType : hostLang->defaultAlphType; + thisKeyOps.setAlphType( alphType ); + + if ( lowerNum != 0 ) { + /* If ranges are given then interpret the alphabet type. */ + thisKeyOps.minKey = makeFsmKeyNum( lowerNum, rangeLowLoc, this ); + thisKeyOps.maxKey = makeFsmKeyNum( upperNum, rangeHighLoc, this ); + } + + thisCondData.nextCondKey = thisKeyOps.maxKey; + thisCondData.nextCondKey.increment(); +} + +void ParseData::printNameInst( NameInst *nameInst, int level ) +{ + for ( int i = 0; i < level; i++ ) + cerr << " "; + cerr << (nameInst->name != 0 ? nameInst->name : "<ANON>") << + " id: " << nameInst->id << + " refs: " << nameInst->numRefs << + " uses: " << nameInst->numUses << endl; + for ( NameVect::Iter name = nameInst->childVect; name.lte(); name++ ) + printNameInst( *name, level+1 ); +} + +/* Remove duplicates of unique actions from an action table. */ +void ParseData::removeDups( ActionTable &table ) +{ + /* Scan through the table looking for unique actions to + * remove duplicates of. */ + for ( int i = 0; i < table.length(); i++ ) { + /* Remove any duplicates ahead of i. */ + for ( int r = i+1; r < table.length(); ) { + if ( table[r].value == table[i].value ) + table.vremove(r); + else + r += 1; + } + } +} + +/* Remove duplicates from action lists. This operates only on transition and + * eof action lists and so should be called once all actions have been + * transfered to their final resting place. */ +void ParseData::removeActionDups( FsmAp *graph ) +{ + /* Loop all states. */ + for ( StateList::Iter state = graph->stateList; state.lte(); state++ ) { + /* Loop all transitions. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) + removeDups( trans->actionTable ); + removeDups( state->toStateActionTable ); + removeDups( state->fromStateActionTable ); + removeDups( state->eofActionTable ); + } +} + +Action *ParseData::newAction(const char *name, InlineList *inlineList ) +{ + InputLoc loc; + loc.line = 1; + loc.col = 1; + loc.fileName = "<NONE>"; + + Action *action = new Action( loc, name, inlineList, nextCondId++ ); + action->actionRefs.append( rootName ); + actionList.append( action ); + return action; +} + +void ParseData::initLongestMatchData() +{ + if ( lmList.length() > 0 ) { + /* The initTokStart action resets the token start. */ + InlineList *il1 = new InlineList; + il1->append( new InlineItem( InputLoc(), InlineItem::LmInitTokStart ) ); + initTokStart = newAction( "initts", il1 ); + initTokStart->isLmAction = true; + + /* The initActId action gives act a default value. */ + InlineList *il4 = new InlineList; + il4->append( new InlineItem( InputLoc(), InlineItem::LmInitAct ) ); + initActId = newAction( "initact", il4 ); + initActId->isLmAction = true; + + /* The setTokStart action sets tokstart. */ + InlineList *il5 = new InlineList; + il5->append( new InlineItem( InputLoc(), InlineItem::LmSetTokStart ) ); + setTokStart = newAction( "tokstart", il5 ); + setTokStart->isLmAction = true; + + /* The setTokEnd action sets tokend. */ + InlineList *il3 = new InlineList; + il3->append( new InlineItem( InputLoc(), InlineItem::LmSetTokEnd ) ); + setTokEnd = newAction( "tokend", il3 ); + setTokEnd->isLmAction = true; + + /* The action will also need an ordering: ahead of all user action + * embeddings. */ + initTokStartOrd = curActionOrd++; + initActIdOrd = curActionOrd++; + setTokStartOrd = curActionOrd++; + setTokEndOrd = curActionOrd++; + } +} + +/* After building the graph, do some extra processing to ensure the runtime + * data of the longest mactch operators is consistent. */ +void ParseData::setLongestMatchData( FsmAp *graph ) +{ + if ( lmList.length() > 0 ) { + /* Make sure all entry points (targets of fgoto, fcall, fnext, fentry) + * init the tokstart. */ + for ( EntryMap::Iter en = graph->entryPoints; en.lte(); en++ ) { + /* This is run after duplicates are removed, we must guard against + * inserting a duplicate. */ + ActionTable &actionTable = en->value->toStateActionTable; + if ( ! actionTable.hasAction( initTokStart ) ) + actionTable.setAction( initTokStartOrd, initTokStart ); + } + + /* Find the set of states that are the target of transitions with + * actions that have calls. These states will be targeted by fret + * statements. */ + StateSet states; + for ( StateList::Iter state = graph->stateList; state.lte(); state++ ) { + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + for ( ActionTable::Iter ati = trans->actionTable; ati.lte(); ati++ ) { + if ( ati->value->anyCall && trans->toState != 0 ) + states.insert( trans->toState ); + } + } + } + + + /* Init tokstart upon entering the above collected states. */ + for ( StateSet::Iter ps = states; ps.lte(); ps++ ) { + /* This is run after duplicates are removed, we must guard against + * inserting a duplicate. */ + ActionTable &actionTable = (*ps)->toStateActionTable; + if ( ! actionTable.hasAction( initTokStart ) ) + actionTable.setAction( initTokStartOrd, initTokStart ); + } + } +} + +/* Make the graph from a graph dict node. Does minimization and state sorting. */ +FsmAp *ParseData::makeInstance( GraphDictEl *gdNode ) +{ + /* Build the graph from a walk of the parse tree. */ + FsmAp *graph = gdNode->value->walk( this ); + + /* Resolve any labels that point to multiple states. Any labels that are + * still around are referenced only by gotos and calls and they need to be + * made into deterministic entry points. */ + graph->deterministicEntry(); + + /* + * All state construction is now complete. + */ + + /* Transfer global error actions. */ + for ( StateList::Iter state = graph->stateList; state.lte(); state++ ) + graph->transferErrorActions( state, 0 ); + + removeActionDups( graph ); + + /* Remove unreachable states. There should be no dead end states. The + * subtract and intersection operators are the only places where they may + * be created and those operators clean them up. */ + graph->removeUnreachableStates(); + + /* No more fsm operations are to be done. Action ordering numbers are + * no longer of use and will just hinder minimization. Clear them. */ + graph->nullActionKeys(); + + /* Transition priorities are no longer of use. We can clear them + * because they will just hinder minimization as well. Clear them. */ + graph->clearAllPriorities(); + + if ( minimizeOpt != MinimizeNone ) { + /* Minimize here even if we minimized at every op. Now that function + * keys have been cleared we may get a more minimal fsm. */ + switch ( minimizeLevel ) { + case MinimizeApprox: + graph->minimizeApproximate(); + break; + case MinimizeStable: + graph->minimizeStable(); + break; + case MinimizePartition1: + graph->minimizePartition1(); + break; + case MinimizePartition2: + graph->minimizePartition2(); + break; + } + } + + graph->compressTransitions(); + + return graph; +} + +void ParseData::printNameTree() +{ + /* Print the name instance map. */ + for ( NameVect::Iter name = rootName->childVect; name.lte(); name++ ) + printNameInst( *name, 0 ); + + cerr << "name index:" << endl; + /* Show that the name index is correct. */ + for ( int ni = 0; ni < nextNameId; ni++ ) { + cerr << ni << ": "; + const char *name = nameIndex[ni]->name; + cerr << ( name != 0 ? name : "<ANON>" ) << endl; + } +} + +FsmAp *ParseData::makeSpecific( GraphDictEl *gdNode ) +{ + /* Build the name tree and supporting data structures. */ + makeNameTree( gdNode ); + + /* Resove name references from gdNode. */ + initNameWalk(); + gdNode->value->resolveNameRefs( this ); + + /* Do not resolve action references. Since we are not building the entire + * graph there's a good chance that many name references will fail. This + * is okay since generating part of the graph is usually only done when + * inspecting the compiled machine. */ + + /* Same story for extern entry point references. */ + + /* Flag this case so that the XML code generator is aware that we haven't + * looked up name references in actions. It can then avoid segfaulting. */ + generatingSectionSubset = true; + + /* Just building the specified graph. */ + initNameWalk(); + FsmAp *mainGraph = makeInstance( gdNode ); + + return mainGraph; +} + +FsmAp *ParseData::makeAll() +{ + /* Build the name tree and supporting data structures. */ + makeNameTree( 0 ); + + /* Resove name references in the tree. */ + initNameWalk(); + for ( GraphList::Iter glel = instanceList; glel.lte(); glel++ ) + glel->value->resolveNameRefs( this ); + + /* Resolve action code name references. */ + resolveActionNameRefs(); + + /* Force name references to the top level instantiations. */ + for ( NameVect::Iter inst = rootName->childVect; inst.lte(); inst++ ) + (*inst)->numRefs += 1; + + FsmAp *mainGraph = 0; + FsmAp **graphs = new FsmAp*[instanceList.length()]; + int numOthers = 0; + + /* Make all the instantiations, we know that main exists in this list. */ + initNameWalk(); + for ( GraphList::Iter glel = instanceList; glel.lte(); glel++ ) { + if ( strcmp( glel->key, mainMachine ) == 0 ) { + /* Main graph is always instantiated. */ + mainGraph = makeInstance( glel ); + } + else { + /* Instantiate and store in others array. */ + graphs[numOthers++] = makeInstance( glel ); + } + } + + if ( mainGraph == 0 ) + mainGraph = graphs[--numOthers]; + + if ( numOthers > 0 ) { + /* Add all the other graphs into main. */ + mainGraph->globOp( graphs, numOthers ); + } + + delete[] graphs; + return mainGraph; +} + +void ParseData::analyzeAction( Action *action, InlineList *inlineList ) +{ + /* FIXME: Actions used as conditions should be very constrained. */ + for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) { + if ( item->type == InlineItem::Call || item->type == InlineItem::CallExpr ) + action->anyCall = true; + + /* Need to recurse into longest match items. */ + if ( item->type == InlineItem::LmSwitch ) { + LongestMatch *lm = item->longestMatch; + for ( LmPartList::Iter lmi = *lm->longestMatchList; lmi.lte(); lmi++ ) { + if ( lmi->action != 0 ) + analyzeAction( action, lmi->action->inlineList ); + } + } + + if ( item->type == InlineItem::LmOnLast || + item->type == InlineItem::LmOnNext || + item->type == InlineItem::LmOnLagBehind ) + { + LongestMatchPart *lmi = item->longestMatchPart; + if ( lmi->action != 0 ) + analyzeAction( action, lmi->action->inlineList ); + } + + if ( item->children != 0 ) + analyzeAction( action, item->children ); + } +} + + +/* Check actions for bad uses of fsm directives. We don't go inside longest + * match items in actions created by ragel, since we just want the user + * actions. */ +void ParseData::checkInlineList( Action *act, InlineList *inlineList ) +{ + for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) { + /* EOF checks. */ + if ( act->numEofRefs > 0 ) { + switch ( item->type ) { + case InlineItem::PChar: + error(item->loc) << "pointer to current element does not exist in " + "EOF action code" << endl; + break; + case InlineItem::Char: + error(item->loc) << "current element does not exist in " + "EOF action code" << endl; + break; + case InlineItem::Hold: + error(item->loc) << "changing the current element not possible in " + "EOF action code" << endl; + break; + case InlineItem::Exec: + error(item->loc) << "changing the current element not possible in " + "EOF action code" << endl; + break; + case InlineItem::Goto: case InlineItem::Call: + case InlineItem::Next: case InlineItem::GotoExpr: + case InlineItem::CallExpr: case InlineItem::NextExpr: + case InlineItem::Ret: + error(item->loc) << "changing the current state not possible in " + "EOF action code" << endl; + break; + default: + break; + } + } + + /* Recurse. */ + if ( item->children != 0 ) + checkInlineList( act, item->children ); + } +} + +void ParseData::checkAction( Action *action ) +{ + /* Check for actions with calls that are embedded within a longest match + * machine. */ + if ( !action->isLmAction && action->numRefs() > 0 && action->anyCall ) { + for ( ActionRefs::Iter ar = action->actionRefs; ar.lte(); ar++ ) { + NameInst *check = *ar; + while ( check != 0 ) { + if ( check->isLongestMatch ) { + error(action->loc) << "within a scanner, fcall is permitted" + " only in pattern actions" << endl; + break; + } + check = check->parent; + } + } + } + + checkInlineList( action, action->inlineList ); +} + + +void ParseData::analyzeGraph( FsmAp *graph ) +{ + for ( ActionList::Iter act = actionList; act.lte(); act++ ) + analyzeAction( act, act->inlineList ); + + for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) { + /* The transition list. */ + for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) { + for ( ActionTable::Iter at = trans->actionTable; at.lte(); at++ ) + at->value->numTransRefs += 1; + } + + for ( ActionTable::Iter at = st->toStateActionTable; at.lte(); at++ ) + at->value->numToStateRefs += 1; + + for ( ActionTable::Iter at = st->fromStateActionTable; at.lte(); at++ ) + at->value->numFromStateRefs += 1; + + for ( ActionTable::Iter at = st->eofActionTable; at.lte(); at++ ) + at->value->numEofRefs += 1; + + for ( StateCondList::Iter sc = st->stateCondList; sc.lte(); sc++ ) { + for ( CondSet::Iter sci = sc->condSpace->condSet; sci.lte(); sci++ ) + (*sci)->numCondRefs += 1; + } + } + + /* Checks for bad usage of directives in action code. */ + for ( ActionList::Iter act = actionList; act.lte(); act++ ) + checkAction( act ); +} + +void ParseData::makeExportsNameTree() +{ + /* Make a name tree for the exports. */ + initExportsNameWalk(); + + /* First make the name tree. */ + for ( GraphDict::Iter gdel = graphDict; gdel.lte(); gdel++ ) { + if ( gdel->value->isExport ) { + /* Recurse on the instance. */ + gdel->value->makeNameTree( gdel->loc, this ); + } + } +} + +void ParseData::makeExports() +{ + makeExportsNameTree(); + + /* Resove name references in the tree. */ + initExportsNameWalk(); + for ( GraphDict::Iter gdel = graphDict; gdel.lte(); gdel++ ) { + if ( gdel->value->isExport ) + gdel->value->resolveNameRefs( this ); + } + + /* Make all the instantiations, we know that main exists in this list. */ + initExportsNameWalk(); + for ( GraphDict::Iter gdel = graphDict; gdel.lte(); gdel++ ) { + /* Check if this var def is an export. */ + if ( gdel->value->isExport ) { + /* Build the graph from a walk of the parse tree. */ + FsmAp *graph = gdel->value->walk( this ); + + /* Build the graph from a walk of the parse tree. */ + if ( !graph->checkSingleCharMachine() ) { + error(gdel->loc) << "bad export machine, must define " + "a single character" << endl; + } + else { + /* Safe to extract the key and declare the export. */ + Key exportKey = graph->startState->outList.head->lowKey; + exportList.append( new Export( gdel->value->name, exportKey ) ); + } + } + } + +} + +void ParseData::prepareMachineGen( GraphDictEl *graphDictEl ) +{ + beginProcessing(); + initKeyOps(); + makeRootNames(); + initLongestMatchData(); + + /* Make the graph, do minimization. */ + if ( graphDictEl == 0 ) + sectionGraph = makeAll(); + else + sectionGraph = makeSpecific( graphDictEl ); + + /* Compute exports from the export definitions. */ + makeExports(); + + /* If any errors have occured in the input file then don't write anything. */ + if ( gblErrorCount > 0 ) + return; + + analyzeGraph( sectionGraph ); + + /* Depends on the graph analysis. */ + setLongestMatchData( sectionGraph ); + + /* Decide if an error state is necessary. + * 1. There is an error transition + * 2. There is a gap in the transitions + * 3. The longest match operator requires it. */ + if ( lmRequiresErrorState || sectionGraph->hasErrorTrans() ) + sectionGraph->errState = sectionGraph->addState(); + + /* State numbers need to be assigned such that all final states have a + * larger state id number than all non-final states. This enables the + * first_final mechanism to function correctly. We also want states to be + * ordered in a predictable fashion. So we first apply a depth-first + * search, then do a stable sort by final state status, then assign + * numbers. */ + + sectionGraph->depthFirstOrdering(); + sectionGraph->sortStatesByFinal(); + sectionGraph->setStateNumbers( 0 ); +} + +void ParseData::generateXML( ostream &out ) +{ + beginProcessing(); + + /* Make the generator. */ + XMLCodeGen codeGen( sectionName, this, sectionGraph, out ); + + /* Write out with it. */ + codeGen.writeXML(); + + if ( printStatistics ) { + cerr << "fsm name : " << sectionName << endl; + cerr << "num states: " << sectionGraph->stateList.length() << endl; + cerr << endl; + } +} + +/* Send eof to all parsers. */ +void terminateAllParsers( ) +{ + /* FIXME: a proper token is needed here. Suppose we should use the + * location of EOF in the last file that the parser was referenced in. */ + InputLoc loc; + loc.fileName = "<EOF>"; + loc.line = 0; + loc.col = 0; + for ( ParserDict::Iter pdel = parserDict; pdel.lte(); pdel++ ) + pdel->value->token( loc, _eof, 0, 0 ); +} + +void writeLanguage( std::ostream &out ) +{ + out << " lang=\""; + switch ( hostLangType ) { + case CCode: out << "C"; break; + case DCode: out << "D"; break; + case JavaCode: out << "Java"; break; + case RubyCode: out << "Ruby"; break; + } + out << "\""; + +} + +void writeMachines( std::ostream &out, std::string hostData, const char *inputFileName ) +{ + if ( machineSpec == 0 && machineName == 0 ) { + /* No machine spec or machine name given. Generate everything. */ + for ( ParserDict::Iter parser = parserDict; parser.lte(); parser++ ) { + ParseData *pd = parser->value->pd; + if ( pd->instanceList.length() > 0 ) + pd->prepareMachineGen( 0 ); + } + + if ( gblErrorCount == 0 ) { + out << "<ragel filename=\"" << inputFileName << "\""; + writeLanguage( out ); + out << ">\n"; + for ( ParserDict::Iter parser = parserDict; parser.lte(); parser++ ) { + ParseData *pd = parser->value->pd; + if ( pd->instanceList.length() > 0 ) + pd->generateXML( out ); + } + out << hostData; + out << "</ragel>\n"; + } + } + else if ( parserDict.length() > 0 ) { + /* There is either a machine spec or machine name given. */ + ParseData *parseData = 0; + GraphDictEl *graphDictEl = 0; + + /* Traverse the sections, break out when we find a section/machine + * that matches the one specified. */ + for ( ParserDict::Iter parser = parserDict; parser.lte(); parser++ ) { + ParseData *checkPd = parser->value->pd; + if ( machineSpec == 0 || strcmp( checkPd->sectionName, machineSpec ) == 0 ) { + GraphDictEl *checkGdEl = 0; + if ( machineName == 0 || (checkGdEl = + checkPd->graphDict.find( machineName )) != 0 ) + { + /* Have a machine spec and/or machine name that matches + * the -M/-S options. */ + parseData = checkPd; + graphDictEl = checkGdEl; + break; + } + } + } + + if ( parseData == 0 ) + error() << "could not locate machine specified with -S and/or -M" << endl; + else { + /* Section/Machine to emit was found. Prepare and emit it. */ + parseData->prepareMachineGen( graphDictEl ); + if ( gblErrorCount == 0 ) { + out << "<ragel filename=\"" << inputFileName << "\""; + writeLanguage( out ); + out << ">\n"; + parseData->generateXML( out ); + out << hostData; + out << "</ragel>\n"; + } + } + } +} diff --git a/contrib/tools/ragel5/ragel/parsedata.h b/contrib/tools/ragel5/ragel/parsedata.h new file mode 100644 index 0000000000..2baa7373d2 --- /dev/null +++ b/contrib/tools/ragel5/ragel/parsedata.h @@ -0,0 +1,401 @@ +/* + * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _PARSEDATA_H +#define _PARSEDATA_H + +#include <iostream> +#include <limits.h> +#include "avlmap.h" +#include "bstmap.h" +#include "vector.h" +#include "dlist.h" +#include "fsmgraph.h" +#include "compare.h" +#include "vector.h" +#include "common.h" +#include "parsetree.h" + +/* Forwards. */ +using std::ostream; + +struct VarDef; +struct Join; +struct Expression; +struct Term; +struct FactorWithAug; +struct FactorWithLabel; +struct FactorWithRep; +struct FactorWithNeg; +struct Factor; +struct Literal; +struct Range; +struct RegExpr; +struct ReItem; +struct ReOrBlock; +struct ReOrItem; +struct LongestMatch; +typedef DList<LongestMatch> LmList; + +/* Graph dictionary. */ +struct GraphDictEl +: + public AvlTreeEl<GraphDictEl>, + public DListEl<GraphDictEl> +{ + GraphDictEl(const char *k ) + : key(k), value(0), isInstance(false) { } + GraphDictEl(const char *k, VarDef *value ) + : key(k), value(value), isInstance(false) { } + + const char *getKey() { return key; } + + const char *key; + VarDef *value; + bool isInstance; + + /* Location info of graph definition. Points to variable name of assignment. */ + InputLoc loc; +}; + +typedef AvlTree<GraphDictEl, char*, CmpStr> GraphDict; +typedef DList<GraphDictEl> GraphList; + +/* Priority name dictionary. */ +typedef AvlMapEl<char*, int> PriorDictEl; +typedef AvlMap<char*, int, CmpStr> PriorDict; + +/* Local error name dictionary. */ +typedef AvlMapEl<const char*, int> LocalErrDictEl; +typedef AvlMap<const char*, int, CmpStr> LocalErrDict; + +/* Tree of instantiated names. */ +typedef BstMapEl<const char*, NameInst*> NameMapEl; +typedef BstMap<const char*, NameInst*, CmpStr> NameMap; +typedef Vector<NameInst*> NameVect; +typedef BstSet<NameInst*> NameSet; + +/* Node in the tree of instantiated names. */ +struct NameInst +{ + NameInst( const InputLoc &loc, NameInst *parent, const char *name, int id, bool isLabel ) : + loc(loc), parent(parent), name(name), id(id), isLabel(isLabel), + isLongestMatch(false), numRefs(0), numUses(0), start(0), final(0) {} + + InputLoc loc; + + /* Keep parent pointers in the name tree to retrieve + * fully qulified names. */ + NameInst *parent; + + const char *name; + int id; + bool isLabel; + bool isLongestMatch; + + int numRefs; + int numUses; + + /* Names underneath us, excludes anonymous names. */ + NameMap children; + + /* All names underneath us in order of appearance. */ + NameVect childVect; + + /* Join scopes need an implicit "final" target. */ + NameInst *start, *final; + + /* During a fsm generation walk, lists the names that are referenced by + * epsilon operations in the current scope. After the link is made by the + * epsilon reference and the join operation is complete, the label can + * have its refcount decremented. Once there are no more references the + * entry point can be removed from the fsm returned. */ + NameVect referencedNames; + + /* Pointers for the name search queue. */ + NameInst *prev, *next; + + /* Check if this name inst or any name inst below is referenced. */ + bool anyRefsRec(); +}; + +typedef DList<NameInst> NameInstList; + +/* Stack frame used in walking the name tree. */ +struct NameFrame +{ + NameInst *prevNameInst; + int prevNameChild; + NameInst *prevLocalScope; +}; + +/* Class to collect information about the machine during the + * parse of input. */ +struct ParseData +{ + /* Create a new parse data object. This is done at the beginning of every + * fsm specification. */ + ParseData(const char *fileName, char *sectionName, const InputLoc §ionLoc ); + ~ParseData(); + + /* + * Setting up the graph dict. + */ + + /* Initialize a graph dict with the basic fsms. */ + void initGraphDict(); + void createBuiltin(const char *name, BuiltinMachine builtin ); + + /* Make a name id in the current name instantiation scope if it is not + * already there. */ + NameInst *addNameInst( const InputLoc &loc, const char *data, bool isLabel ); + void makeRootNames(); + void makeNameTree( GraphDictEl *gdNode ); + void makeExportsNameTree(); + void fillNameIndex( NameInst *from ); + void printNameTree(); + + /* Increments the usage count on entry names. Names that are no longer + * needed will have their entry points unset. */ + void unsetObsoleteEntries( FsmAp *graph ); + + /* Resove name references in action code and epsilon transitions. */ + NameSet resolvePart( NameInst *refFrom, const char *data, bool recLabelsOnly ); + void resolveFrom( NameSet &result, NameInst *refFrom, + const NameRef &nameRef, int namePos ); + NameInst *resolveStateRef( const NameRef &nameRef, InputLoc &loc, Action *action ); + void resolveNameRefs( InlineList *inlineList, Action *action ); + void resolveActionNameRefs(); + + /* Set the alphabet type. If type types are not valid returns false. */ + bool setAlphType( char *s1, char *s2 ); + bool setAlphType( char *s1 ); + + /* Unique actions. */ + void removeDups( ActionTable &actionTable ); + void removeActionDups( FsmAp *graph ); + + /* Dumping the name instantiation tree. */ + void printNameInst( NameInst *nameInst, int level ); + + /* Make the graph from a graph dict node. Does minimization. */ + FsmAp *makeInstance( GraphDictEl *gdNode ); + FsmAp *makeSpecific( GraphDictEl *gdNode ); + FsmAp *makeAll(); + + /* Checking the contents of actions. */ + void checkAction( Action *action ); + void checkInlineList( Action *act, InlineList *inlineList ); + + void analyzeAction( Action *action, InlineList *inlineList ); + void analyzeGraph( FsmAp *graph ); + void makeExports(); + + void prepareMachineGen( GraphDictEl *graphDictEl ); + void generateXML( ostream &out ); + FsmAp *sectionGraph; + bool generatingSectionSubset; + + void initKeyOps(); + + /* + * Data collected during the parse. + */ + + /* Dictionary of graphs. Both instances and non-instances go here. */ + GraphDict graphDict; + + /* The list of instances. */ + GraphList instanceList; + + /* Dictionary of actions. Lets actions be defined and then referenced. */ + ActionDict actionDict; + + /* Dictionary of named priorities. */ + PriorDict priorDict; + + /* Dictionary of named local errors. */ + LocalErrDict localErrDict; + + /* List of actions. Will be pasted into a switch statement. */ + ActionList actionList; + + /* The id of the next priority name and label. */ + int nextPriorKey, nextLocalErrKey, nextNameId, nextCondId; + + /* The default priority number key for a machine. This is active during + * the parse of the rhs of a machine assignment. */ + int curDefPriorKey; + + int curDefLocalErrKey; + + /* Alphabet type. */ + HostType *userAlphType; + bool alphTypeSet; + + /* Element type and get key expression. */ + InlineList *getKeyExpr; + InlineList *accessExpr; + InlineList *curStateExpr; + + /* The alphabet range. */ + char *lowerNum, *upperNum; + Key lowKey, highKey; + InputLoc rangeLowLoc, rangeHighLoc; + + /* The name of the file the fsm is from, and the spec name. */ + const char *fileName; + char *sectionName; + InputLoc sectionLoc; + + /* Number of errors encountered parsing the fsm spec. */ + int errorCount; + + /* Counting the action and priority ordering. */ + int curActionOrd; + int curPriorOrd; + + /* Root of the name tree. One root is for the instantiated machines. The + * other root is for exported definitions. */ + NameInst *rootName; + NameInst *exportsRootName; + + /* Name tree walking. */ + NameInst *curNameInst; + int curNameChild; + + /* The place where resolved epsilon transitions go. These cannot go into + * the parse tree because a single epsilon op can resolve more than once + * to different nameInsts if the machine it's in is used more than once. */ + NameVect epsilonResolvedLinks; + int nextEpsilonResolvedLink; + + /* Root of the name tree used for doing local name searches. */ + NameInst *localNameScope; + + void setLmInRetLoc( InlineList *inlineList ); + void initLongestMatchData(); + void setLongestMatchData( FsmAp *graph ); + void initNameWalk(); + void initExportsNameWalk(); + NameInst *nextNameScope() { return curNameInst->childVect[curNameChild]; } + NameFrame enterNameScope( bool isLocal, int numScopes ); + void popNameScope( const NameFrame &frame ); + void resetNameScope( const NameFrame &frame ); + + /* Make name ids to name inst pointers. */ + NameInst **nameIndex; + + /* Counter for assigning ids to longest match items. */ + int nextLongestMatchId; + bool lmRequiresErrorState; + + /* List of all longest match parse tree items. */ + LmList lmList; + + Action *newAction(const char *name, InlineList *inlineList ); + + Action *initTokStart; + int initTokStartOrd; + + Action *setTokStart; + int setTokStartOrd; + + Action *initActId; + int initActIdOrd; + + Action *setTokEnd; + int setTokEndOrd; + + void beginProcessing() + { + ::condData = &thisCondData; + ::keyOps = &thisKeyOps; + } + + CondData thisCondData; + KeyOps thisKeyOps; + + ExportList exportList; +}; + +void afterOpMinimize( FsmAp *fsm, bool lastInSeq = true ); +Key makeFsmKeyHex( char *str, const InputLoc &loc, ParseData *pd ); +Key makeFsmKeyDec( char *str, const InputLoc &loc, ParseData *pd ); +Key makeFsmKeyNum( char *str, const InputLoc &loc, ParseData *pd ); +Key makeFsmKeyChar( char c, ParseData *pd ); +void makeFsmKeyArray( Key *result, char *data, int len, ParseData *pd ); +void makeFsmUniqueKeyArray( KeySet &result, char *data, int len, + bool caseInsensitive, ParseData *pd ); +FsmAp *makeBuiltin( BuiltinMachine builtin, ParseData *pd ); +FsmAp *dotFsm( ParseData *pd ); +FsmAp *dotStarFsm( ParseData *pd ); + +void errorStateLabels( const NameSet &locations ); + +/* Data used by the parser specific to the current file. Supports the include + * system, since a new parser is executed for each included file. */ +struct InputData +{ + InputData( char *fileName, char *includeSpec, char *includeTo ) : + pd(0), sectionName(0), defaultParseData(0), + first_line(1), first_column(1), + last_line(1), last_column(0), + fileName(fileName), includeSpec(includeSpec), + includeTo(includeTo), active(true) + {} + + /* For collecting a name references. */ + NameRef nameRef; + NameRefList nameRefList; + + /* The parse data. For each fsm spec, the parser collects things that it parses + * in data structures in here. */ + ParseData *pd; + + char *sectionName; + ParseData *defaultParseData; + + int first_line; + int first_column; + int last_line; + int last_column; + + char *fileName; + + /* If this is an included file, this contains the specification to search + * for. IncludeTo will contain the spec name that does the includng. */ + char *includeSpec; + char *includeTo; + + bool active; + InputLoc sectionLoc; +}; + +struct Parser; + +typedef AvlMap<char*, Parser *, CmpStr> ParserDict; +typedef AvlMapEl<char*, Parser *> ParserDictEl; + +extern ParserDict parserDict; + + +#endif /* _PARSEDATA_H */ diff --git a/contrib/tools/ragel5/ragel/parsetree.cpp b/contrib/tools/ragel5/ragel/parsetree.cpp new file mode 100644 index 0000000000..4755e3085b --- /dev/null +++ b/contrib/tools/ragel5/ragel/parsetree.cpp @@ -0,0 +1,2089 @@ +/* + * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <iostream> +#include <iomanip> +#include <errno.h> +#include <limits.h> +#include <stdlib.h> + +/* Parsing. */ +#include "ragel.h" +#include "rlparse.h" +#include "parsetree.h" + +using namespace std; +ostream &operator<<( ostream &out, const NameRef &nameRef ); +ostream &operator<<( ostream &out, const NameInst &nameInst ); + +/* Convert the literal string which comes in from the scanner into an array of + * characters with escapes and options interpreted. Also null terminates the + * string. Though this null termination should not be relied on for + * interpreting literals in the parser because the string may contain a + * literal string with \0 */ +void Token::prepareLitString( Token &result, bool &caseInsensitive ) +{ + result.data = new char[this->length+1]; + caseInsensitive = false; + + char *src = this->data + 1; + char *end = this->data + this->length - 1; + + while ( *end != '\'' && *end != '\"' ) { + if ( *end == 'i' ) + caseInsensitive = true; + else { + error( this->loc ) << "literal string '" << *end << + "' option not supported" << endl; + } + end -= 1; + } + + char *dest = result.data; + int len = 0; + while ( src != end ) { + if ( *src == '\\' ) { + switch ( src[1] ) { + case '0': dest[len++] = '\0'; break; + case 'a': dest[len++] = '\a'; break; + case 'b': dest[len++] = '\b'; break; + case 't': dest[len++] = '\t'; break; + case 'n': dest[len++] = '\n'; break; + case 'v': dest[len++] = '\v'; break; + case 'f': dest[len++] = '\f'; break; + case 'r': dest[len++] = '\r'; break; + case '\n': break; + default: dest[len++] = src[1]; break; + } + src += 2; + } + else { + dest[len++] = *src++; + } + } + result.length = len; + result.data[result.length] = 0; +} + + +FsmAp *VarDef::walk( ParseData *pd ) +{ + /* We enter into a new name scope. */ + NameFrame nameFrame = pd->enterNameScope( true, 1 ); + + /* Recurse on the expression. */ + FsmAp *rtnVal = joinOrLm->walk( pd ); + + /* Do the tranfer of local error actions. */ + LocalErrDictEl *localErrDictEl = pd->localErrDict.find( name ); + if ( localErrDictEl != 0 ) { + for ( StateList::Iter state = rtnVal->stateList; state.lte(); state++ ) + rtnVal->transferErrorActions( state, localErrDictEl->value ); + } + + /* If the expression below is a join operation with multiple expressions + * then it just had epsilon transisions resolved. If it is a join + * with only a single expression then run the epsilon op now. */ + if ( joinOrLm->type == JoinOrLm::JoinType && joinOrLm->join->exprList.length() == 1 ) + rtnVal->epsilonOp(); + + /* We can now unset entry points that are not longer used. */ + pd->unsetObsoleteEntries( rtnVal ); + + /* If the name of the variable is referenced then add the entry point to + * the graph. */ + if ( pd->curNameInst->numRefs > 0 ) + rtnVal->setEntry( pd->curNameInst->id, rtnVal->startState ); + + /* Pop the name scope. */ + pd->popNameScope( nameFrame ); + return rtnVal; +} + +void VarDef::makeNameTree( const InputLoc &loc, ParseData *pd ) +{ + /* The variable definition enters a new scope. */ + NameInst *prevNameInst = pd->curNameInst; + pd->curNameInst = pd->addNameInst( loc, name, false ); + + if ( joinOrLm->type == JoinOrLm::LongestMatchType ) + pd->curNameInst->isLongestMatch = true; + + /* Recurse. */ + joinOrLm->makeNameTree( pd ); + + /* The name scope ends, pop the name instantiation. */ + pd->curNameInst = prevNameInst; +} + +void VarDef::resolveNameRefs( ParseData *pd ) +{ + /* Entering into a new scope. */ + NameFrame nameFrame = pd->enterNameScope( true, 1 ); + + /* Recurse. */ + joinOrLm->resolveNameRefs( pd ); + + /* The name scope ends, pop the name instantiation. */ + pd->popNameScope( nameFrame ); +} + +InputLoc LongestMatchPart::getLoc() +{ + return action != 0 ? action->loc : semiLoc; +} + +/* + * If there are any LMs then all of the following entry points must reset + * tokstart: + * + * 1. fentry(StateRef) + * 2. ftoto(StateRef), fcall(StateRef), fnext(StateRef) + * 3. targt of any transition that has an fcall (the return loc). + * 4. start state of all longest match routines. + */ + +Action *LongestMatch::newAction( ParseData *pd, const InputLoc &loc, + const char *name, InlineList *inlineList ) +{ + Action *action = new Action( loc, name, inlineList, pd->nextCondId++ ); + action->actionRefs.append( pd->curNameInst ); + pd->actionList.append( action ); + action->isLmAction = true; + return action; +} + +void LongestMatch::makeActions( ParseData *pd ) +{ + /* Make actions that set the action id. */ + for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) { + /* For each part create actions for setting the match type. We need + * to do this so that the actions will go into the actionIndex. */ + InlineList *inlineList = new InlineList; + inlineList->append( new InlineItem( lmi->getLoc(), this, lmi, InlineItem::LmSetActId ) ); + char *actName = new char[50]; + sprintf( actName, "store%i", lmi->longestMatchId ); + lmi->setActId = newAction( pd, lmi->getLoc(), actName, inlineList ); + } + + /* Make actions that execute the user action and restart on the last character. */ + for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) { + /* For each part create actions for setting the match type. We need + * to do this so that the actions will go into the actionIndex. */ + InlineList *inlineList = new InlineList; + inlineList->append( new InlineItem( lmi->getLoc(), this, lmi, + InlineItem::LmOnLast ) ); + char *actName = new char[50]; + sprintf( actName, "imm%i", lmi->longestMatchId ); + lmi->actOnLast = newAction( pd, lmi->getLoc(), actName, inlineList ); + } + + /* Make actions that execute the user action and restart on the next + * character. These actions will set tokend themselves (it is the current + * char). */ + for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) { + /* For each part create actions for setting the match type. We need + * to do this so that the actions will go into the actionIndex. */ + InlineList *inlineList = new InlineList; + inlineList->append( new InlineItem( lmi->getLoc(), this, lmi, + InlineItem::LmOnNext ) ); + char *actName = new char[50]; + sprintf( actName, "lagh%i", lmi->longestMatchId ); + lmi->actOnNext = newAction( pd, lmi->getLoc(), actName, inlineList ); + } + + /* Make actions that execute the user action and restart at tokend. These + * actions execute some time after matching the last char. */ + for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) { + /* For each part create actions for setting the match type. We need + * to do this so that the actions will go into the actionIndex. */ + InlineList *inlineList = new InlineList; + inlineList->append( new InlineItem( lmi->getLoc(), this, lmi, + InlineItem::LmOnLagBehind ) ); + char *actName = new char[50]; + sprintf( actName, "lag%i", lmi->longestMatchId ); + lmi->actLagBehind = newAction( pd, lmi->getLoc(), actName, inlineList ); + } + + InputLoc loc; + loc.line = 1; + loc.col = 1; + + /* Create the error action. */ + InlineList *il6 = new InlineList; + il6->append( new InlineItem( loc, this, 0, InlineItem::LmSwitch ) ); + lmActSelect = newAction( pd, loc, "lagsel", il6 ); +} + +void LongestMatch::findName( ParseData *pd ) +{ + NameInst *nameInst = pd->curNameInst; + while ( nameInst->name == 0 ) { + nameInst = nameInst->parent; + /* Since every machine must must have a name, we should always find a + * name for the longest match. */ + assert( nameInst != 0 ); + } + name = nameInst->name; +} + +void LongestMatch::makeNameTree( ParseData *pd ) +{ + /* Create an anonymous scope for the longest match. Will be used for + * restarting machine after matching a token. */ + NameInst *prevNameInst = pd->curNameInst; + pd->curNameInst = pd->addNameInst( loc, 0, false ); + + /* Recurse into all parts of the longest match operator. */ + for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) + lmi->join->makeNameTree( pd ); + + /* Traverse the name tree upwards to find a name for this lm. */ + findName( pd ); + + /* Also make the longest match's actions at this point. */ + makeActions( pd ); + + /* The name scope ends, pop the name instantiation. */ + pd->curNameInst = prevNameInst; +} + +void LongestMatch::resolveNameRefs( ParseData *pd ) +{ + /* The longest match gets its own name scope. */ + NameFrame nameFrame = pd->enterNameScope( true, 1 ); + + /* Take an action reference for each longest match item and recurse. */ + for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) { + /* Record the reference if the item has an action. */ + if ( lmi->action != 0 ) + lmi->action->actionRefs.append( pd->localNameScope ); + + /* Recurse down the join. */ + lmi->join->resolveNameRefs( pd ); + } + + /* The name scope ends, pop the name instantiation. */ + pd->popNameScope( nameFrame ); +} + +void LongestMatch::restart( FsmAp *graph, TransAp *trans ) +{ + StateAp *fromState = trans->fromState; + graph->detachTrans( fromState, trans->toState, trans ); + graph->attachTrans( fromState, graph->startState, trans ); +} + +void LongestMatch::runLonestMatch( ParseData *pd, FsmAp *graph ) +{ + graph->markReachableFromHereStopFinal( graph->startState ); + for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) { + if ( ms->stateBits & SB_ISMARKED ) { + ms->lmItemSet.insert( 0 ); + ms->stateBits &= ~ SB_ISMARKED; + } + } + + /* Transfer the first item of non-empty lmAction tables to the item sets + * of the states that follow. Exclude states that have no transitions out. + * This must happen on a separate pass so that on each iteration of the + * next pass we have the item set entries from all lmAction tables. */ + for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) { + for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) { + if ( trans->lmActionTable.length() > 0 ) { + LmActionTableEl *lmAct = trans->lmActionTable.data; + StateAp *toState = trans->toState; + assert( toState ); + + /* Check if there are transitions out, this may be a very + * close approximation? Out transitions going nowhere? + * FIXME: Check. */ + if ( toState->outList.length() > 0 ) { + /* Fill the item sets. */ + graph->markReachableFromHereStopFinal( toState ); + for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) { + if ( ms->stateBits & SB_ISMARKED ) { + ms->lmItemSet.insert( lmAct->value ); + ms->stateBits &= ~ SB_ISMARKED; + } + } + } + } + } + } + + /* The lmItem sets are now filled, telling us which longest match rules + * can succeed in which states. First determine if we need to make sure + * act is defaulted to zero. We need to do this if there are any states + * with lmItemSet.length() > 1 and NULL is included. That is, that the + * switch may get called when in fact nothing has been matched. */ + int maxItemSetLength = 0; + graph->markReachableFromHereStopFinal( graph->startState ); + for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) { + if ( ms->stateBits & SB_ISMARKED ) { + if ( ms->lmItemSet.length() > maxItemSetLength ) + maxItemSetLength = ms->lmItemSet.length(); + ms->stateBits &= ~ SB_ISMARKED; + } + } + + /* The actions executed on starting to match a token. */ + graph->startState->toStateActionTable.setAction( pd->initTokStartOrd, pd->initTokStart ); + graph->startState->fromStateActionTable.setAction( pd->setTokStartOrd, pd->setTokStart ); + if ( maxItemSetLength > 1 ) { + /* The longest match action switch may be called when tokens are + * matched, in which case act must be initialized, there must be a + * case to handle the error, and the generated machine will require an + * error state. */ + lmSwitchHandlesError = true; + pd->lmRequiresErrorState = true; + graph->startState->toStateActionTable.setAction( pd->initActIdOrd, pd->initActId ); + } + + /* The place to store transitions to restart. It maybe possible for the + * restarting to affect the searching through the graph that follows. For + * now take the safe route and save the list of transitions to restart + * until after all searching is done. */ + Vector<TransAp*> restartTrans; + + /* Set actions that do immediate token recognition, set the longest match part + * id and set the token ending. */ + for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) { + for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) { + if ( trans->lmActionTable.length() > 0 ) { + LmActionTableEl *lmAct = trans->lmActionTable.data; + StateAp *toState = trans->toState; + assert( toState ); + + /* Check if there are transitions out, this may be a very + * close approximation? Out transitions going nowhere? + * FIXME: Check. */ + if ( toState->outList.length() == 0 ) { + /* Can execute the immediate action for the longest match + * part. Redirect the action to the start state. */ + trans->actionTable.setAction( lmAct->key, + lmAct->value->actOnLast ); + restartTrans.append( trans ); + } + else { + /* Look for non final states that have a non-empty item + * set. If these are present then we need to record the + * end of the token. Also Find the highest item set + * length reachable from here (excluding at transtions to + * final states). */ + bool nonFinalNonEmptyItemSet = false; + maxItemSetLength = 0; + graph->markReachableFromHereStopFinal( toState ); + for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) { + if ( ms->stateBits & SB_ISMARKED ) { + if ( ms->lmItemSet.length() > 0 && !ms->isFinState() ) + nonFinalNonEmptyItemSet = true; + if ( ms->lmItemSet.length() > maxItemSetLength ) + maxItemSetLength = ms->lmItemSet.length(); + ms->stateBits &= ~ SB_ISMARKED; + } + } + + /* If there are reachable states that are not final and + * have non empty item sets or that have an item set + * length greater than one then we need to set tokend + * because the error action that matches the token will + * require it. */ + if ( nonFinalNonEmptyItemSet || maxItemSetLength > 1 ) + trans->actionTable.setAction( pd->setTokEndOrd, pd->setTokEnd ); + + /* Some states may not know which longest match item to + * execute, must set it. */ + if ( maxItemSetLength > 1 ) { + /* There are transitions out, another match may come. */ + trans->actionTable.setAction( lmAct->key, + lmAct->value->setActId ); + } + } + } + } + } + + /* Now that all graph searching is done it certainly safe set the + * restarting. It may be safe above, however this must be verified. */ + for ( Vector<TransAp*>::Iter pt = restartTrans; pt.lte(); pt++ ) + restart( graph, *pt ); + + int lmErrActionOrd = pd->curActionOrd++; + + /* Embed the error for recognizing a char. */ + for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) { + if ( st->lmItemSet.length() == 1 && st->lmItemSet[0] != 0 ) { + if ( st->isFinState() ) { + /* On error execute the onActNext action, which knows that + * the last character of the token was one back and restart. */ + graph->setErrorTarget( st, graph->startState, &lmErrActionOrd, + &st->lmItemSet[0]->actOnNext, 1 ); + } + else { + graph->setErrorTarget( st, graph->startState, &lmErrActionOrd, + &st->lmItemSet[0]->actLagBehind, 1 ); + } + } + else if ( st->lmItemSet.length() > 1 ) { + /* Need to use the select. Take note of the which items the select + * is needed for so only the necessary actions are included. */ + for ( LmItemSet::Iter plmi = st->lmItemSet; plmi.lte(); plmi++ ) { + if ( *plmi != 0 ) + (*plmi)->inLmSelect = true; + } + /* On error, execute the action select and go to the start state. */ + graph->setErrorTarget( st, graph->startState, &lmErrActionOrd, + &lmActSelect, 1 ); + } + } + + /* Finally, the start state should be made final. */ + graph->setFinState( graph->startState ); +} + +FsmAp *LongestMatch::walk( ParseData *pd ) +{ + /* The longest match has it's own name scope. */ + NameFrame nameFrame = pd->enterNameScope( true, 1 ); + + /* Make each part of the longest match. */ + FsmAp **parts = new FsmAp*[longestMatchList->length()]; + LmPartList::Iter lmi = *longestMatchList; + for ( int i = 0; lmi.lte(); lmi++, i++ ) { + /* Create the machine and embed the setting of the longest match id. */ + parts[i] = lmi->join->walk( pd ); + parts[i]->longMatchAction( pd->curActionOrd++, lmi ); + } + + /* Union machines one and up with machine zero. The grammar dictates that + * there will always be at least one part. */ + FsmAp *rtnVal = parts[0]; + for ( int i = 1; i < longestMatchList->length(); i++ ) { + rtnVal->unionOp( parts[i] ); + afterOpMinimize( rtnVal ); + } + + runLonestMatch( pd, rtnVal ); + + /* Pop the name scope. */ + pd->popNameScope( nameFrame ); + + delete[] parts; + return rtnVal; +} + +FsmAp *JoinOrLm::walk( ParseData *pd ) +{ + FsmAp *rtnVal = 0; + switch ( type ) { + case JoinType: + rtnVal = join->walk( pd ); + break; + case LongestMatchType: + rtnVal = longestMatch->walk( pd ); + break; + } + return rtnVal; +} + +void JoinOrLm::makeNameTree( ParseData *pd ) +{ + switch ( type ) { + case JoinType: + join->makeNameTree( pd ); + break; + case LongestMatchType: + longestMatch->makeNameTree( pd ); + break; + } +} + +void JoinOrLm::resolveNameRefs( ParseData *pd ) +{ + switch ( type ) { + case JoinType: + join->resolveNameRefs( pd ); + break; + case LongestMatchType: + longestMatch->resolveNameRefs( pd ); + break; + } +} + + +/* Construct with a location and the first expression. */ +Join::Join( const InputLoc &loc, Expression *expr ) +: + loc(loc) +{ + exprList.append( expr ); +} + +/* Construct with a location and the first expression. */ +Join::Join( Expression *expr ) +: + loc(loc) +{ + exprList.append( expr ); +} + +/* Walk an expression node. */ +FsmAp *Join::walk( ParseData *pd ) +{ + if ( exprList.length() > 1 ) + return walkJoin( pd ); + else + return exprList.head->walk( pd ); +} + +/* There is a list of expressions to join. */ +FsmAp *Join::walkJoin( ParseData *pd ) +{ + /* We enter into a new name scope. */ + NameFrame nameFrame = pd->enterNameScope( true, 1 ); + + /* Evaluate the machines. */ + FsmAp **fsms = new FsmAp*[exprList.length()]; + ExprList::Iter expr = exprList; + for ( int e = 0; e < exprList.length(); e++, expr++ ) + fsms[e] = expr->walk( pd ); + + /* Get the start and final names. Final is + * guaranteed to exist, start is not. */ + NameInst *startName = pd->curNameInst->start; + NameInst *finalName = pd->curNameInst->final; + + int startId = -1; + if ( startName != 0 ) { + /* Take note that there was an implicit link to the start machine. */ + pd->localNameScope->referencedNames.append( startName ); + startId = startName->id; + } + + /* A final id of -1 indicates there is no epsilon that references the + * final state, therefor do not create one or set an entry point to it. */ + int finalId = -1; + if ( finalName->numRefs > 0 ) + finalId = finalName->id; + + /* Join machines 1 and up onto machine 0. */ + FsmAp *retFsm = fsms[0]; + retFsm->joinOp( startId, finalId, fsms+1, exprList.length()-1 ); + + /* We can now unset entry points that are not longer used. */ + pd->unsetObsoleteEntries( retFsm ); + + /* Pop the name scope. */ + pd->popNameScope( nameFrame ); + + delete[] fsms; + return retFsm; +} + +void Join::makeNameTree( ParseData *pd ) +{ + if ( exprList.length() > 1 ) { + /* Create the new anonymous scope. */ + NameInst *prevNameInst = pd->curNameInst; + pd->curNameInst = pd->addNameInst( loc, 0, false ); + + /* Join scopes need an implicit "final" target. */ + pd->curNameInst->final = new NameInst( InputLoc(), pd->curNameInst, "final", + pd->nextNameId++, false ); + + /* Recurse into all expressions in the list. */ + for ( ExprList::Iter expr = exprList; expr.lte(); expr++ ) + expr->makeNameTree( pd ); + + /* The name scope ends, pop the name instantiation. */ + pd->curNameInst = prevNameInst; + } + else { + /* Recurse into the single expression. */ + exprList.head->makeNameTree( pd ); + } +} + + +void Join::resolveNameRefs( ParseData *pd ) +{ + /* Branch on whether or not there is to be a join. */ + if ( exprList.length() > 1 ) { + /* The variable definition enters a new scope. */ + NameFrame nameFrame = pd->enterNameScope( true, 1 ); + + /* The join scope must contain a start label. */ + NameSet resolved = pd->resolvePart( pd->localNameScope, "start", true ); + if ( resolved.length() > 0 ) { + /* Take the first. */ + pd->curNameInst->start = resolved[0]; + if ( resolved.length() > 1 ) { + /* Complain about the multiple references. */ + error(loc) << "multiple start labels" << endl; + errorStateLabels( resolved ); + } + } + + /* Make sure there is a start label. */ + if ( pd->curNameInst->start != 0 ) { + /* There is an implicit reference to start name. */ + pd->curNameInst->start->numRefs += 1; + } + else { + /* No start label. Complain and recover by adding a label to the + * adding one. Recover ignoring the problem. */ + error(loc) << "no start label" << endl; + } + + /* Recurse into all expressions in the list. */ + for ( ExprList::Iter expr = exprList; expr.lte(); expr++ ) + expr->resolveNameRefs( pd ); + + /* The name scope ends, pop the name instantiation. */ + pd->popNameScope( nameFrame ); + } + else { + /* Recurse into the single expression. */ + exprList.head->resolveNameRefs( pd ); + } +} + +/* Clean up after an expression node. */ +Expression::~Expression() +{ + switch ( type ) { + case OrType: case IntersectType: case SubtractType: + case StrongSubtractType: + delete expression; + delete term; + break; + case TermType: + delete term; + break; + case BuiltinType: + break; + } +} + +/* Evaluate a single expression node. */ +FsmAp *Expression::walk( ParseData *pd, bool lastInSeq ) +{ + FsmAp *rtnVal = 0; + switch ( type ) { + case OrType: { + /* Evaluate the expression. */ + rtnVal = expression->walk( pd, false ); + /* Evaluate the term. */ + FsmAp *rhs = term->walk( pd ); + /* Perform union. */ + rtnVal->unionOp( rhs ); + afterOpMinimize( rtnVal, lastInSeq ); + break; + } + case IntersectType: { + /* Evaluate the expression. */ + rtnVal = expression->walk( pd ); + /* Evaluate the term. */ + FsmAp *rhs = term->walk( pd ); + /* Perform intersection. */ + rtnVal->intersectOp( rhs ); + afterOpMinimize( rtnVal, lastInSeq ); + break; + } + case SubtractType: { + /* Evaluate the expression. */ + rtnVal = expression->walk( pd ); + /* Evaluate the term. */ + FsmAp *rhs = term->walk( pd ); + /* Perform subtraction. */ + rtnVal->subtractOp( rhs ); + afterOpMinimize( rtnVal, lastInSeq ); + break; + } + case StrongSubtractType: { + /* Evaluate the expression. */ + rtnVal = expression->walk( pd ); + + /* Evaluate the term and pad it with any* machines. */ + FsmAp *rhs = dotStarFsm( pd ); + FsmAp *termFsm = term->walk( pd ); + FsmAp *trailAnyStar = dotStarFsm( pd ); + rhs->concatOp( termFsm ); + rhs->concatOp( trailAnyStar ); + + /* Perform subtraction. */ + rtnVal->subtractOp( rhs ); + afterOpMinimize( rtnVal, lastInSeq ); + break; + } + case TermType: { + /* Return result of the term. */ + rtnVal = term->walk( pd ); + break; + } + case BuiltinType: { + /* Duplicate the builtin. */ + rtnVal = makeBuiltin( builtin, pd ); + break; + } + } + + return rtnVal; +} + +void Expression::makeNameTree( ParseData *pd ) +{ + switch ( type ) { + case OrType: + case IntersectType: + case SubtractType: + case StrongSubtractType: + expression->makeNameTree( pd ); + term->makeNameTree( pd ); + break; + case TermType: + term->makeNameTree( pd ); + break; + case BuiltinType: + break; + } +} + +void Expression::resolveNameRefs( ParseData *pd ) +{ + switch ( type ) { + case OrType: + case IntersectType: + case SubtractType: + case StrongSubtractType: + expression->resolveNameRefs( pd ); + term->resolveNameRefs( pd ); + break; + case TermType: + term->resolveNameRefs( pd ); + break; + case BuiltinType: + break; + } +} + +/* Clean up after a term node. */ +Term::~Term() +{ + switch ( type ) { + case ConcatType: + case RightStartType: + case RightFinishType: + case LeftType: + delete term; + delete factorWithAug; + break; + case FactorWithAugType: + delete factorWithAug; + break; + } +} + +/* Evaluate a term node. */ +FsmAp *Term::walk( ParseData *pd, bool lastInSeq ) +{ + FsmAp *rtnVal = 0; + switch ( type ) { + case ConcatType: { + /* Evaluate the Term. */ + rtnVal = term->walk( pd, false ); + /* Evaluate the FactorWithRep. */ + FsmAp *rhs = factorWithAug->walk( pd ); + /* Perform concatenation. */ + rtnVal->concatOp( rhs ); + afterOpMinimize( rtnVal, lastInSeq ); + break; + } + case RightStartType: { + /* Evaluate the Term. */ + rtnVal = term->walk( pd ); + + /* Evaluate the FactorWithRep. */ + FsmAp *rhs = factorWithAug->walk( pd ); + + /* Set up the priority descriptors. The left machine gets the + * lower priority where as the right get the higher start priority. */ + priorDescs[0].key = pd->nextPriorKey++; + priorDescs[0].priority = 0; + rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] ); + + /* The start transitions right machine get the higher priority. + * Use the same unique key. */ + priorDescs[1].key = priorDescs[0].key; + priorDescs[1].priority = 1; + rhs->startFsmPrior( pd->curPriorOrd++, &priorDescs[1] ); + + /* Perform concatenation. */ + rtnVal->concatOp( rhs ); + afterOpMinimize( rtnVal, lastInSeq ); + break; + } + case RightFinishType: { + /* Evaluate the Term. */ + rtnVal = term->walk( pd ); + + /* Evaluate the FactorWithRep. */ + FsmAp *rhs = factorWithAug->walk( pd ); + + /* Set up the priority descriptors. The left machine gets the + * lower priority where as the finishing transitions to the right + * get the higher priority. */ + priorDescs[0].key = pd->nextPriorKey++; + priorDescs[0].priority = 0; + rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] ); + + /* The finishing transitions of the right machine get the higher + * priority. Use the same unique key. */ + priorDescs[1].key = priorDescs[0].key; + priorDescs[1].priority = 1; + rhs->finishFsmPrior( pd->curPriorOrd++, &priorDescs[1] ); + + /* Perform concatenation. */ + rtnVal->concatOp( rhs ); + afterOpMinimize( rtnVal, lastInSeq ); + break; + } + case LeftType: { + /* Evaluate the Term. */ + rtnVal = term->walk( pd ); + + /* Evaluate the FactorWithRep. */ + FsmAp *rhs = factorWithAug->walk( pd ); + + /* Set up the priority descriptors. The left machine gets the + * higher priority. */ + priorDescs[0].key = pd->nextPriorKey++; + priorDescs[0].priority = 1; + rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] ); + + /* The right machine gets the lower priority. Since + * startTransPrior might unnecessarily increase the number of + * states during the state machine construction process (due to + * isolation), we use allTransPrior instead, which has the same + * effect. */ + priorDescs[1].key = priorDescs[0].key; + priorDescs[1].priority = 0; + rhs->allTransPrior( pd->curPriorOrd++, &priorDescs[1] ); + + /* Perform concatenation. */ + rtnVal->concatOp( rhs ); + afterOpMinimize( rtnVal, lastInSeq ); + break; + } + case FactorWithAugType: { + rtnVal = factorWithAug->walk( pd ); + break; + } + } + return rtnVal; +} + +void Term::makeNameTree( ParseData *pd ) +{ + switch ( type ) { + case ConcatType: + case RightStartType: + case RightFinishType: + case LeftType: + term->makeNameTree( pd ); + factorWithAug->makeNameTree( pd ); + break; + case FactorWithAugType: + factorWithAug->makeNameTree( pd ); + break; + } +} + +void Term::resolveNameRefs( ParseData *pd ) +{ + switch ( type ) { + case ConcatType: + case RightStartType: + case RightFinishType: + case LeftType: + term->resolveNameRefs( pd ); + factorWithAug->resolveNameRefs( pd ); + break; + case FactorWithAugType: + factorWithAug->resolveNameRefs( pd ); + break; + } +} + +/* Clean up after a factor with augmentation node. */ +FactorWithAug::~FactorWithAug() +{ + delete factorWithRep; + + /* Walk the vector of parser actions, deleting function names. */ + + /* Clean up priority descriptors. */ + if ( priorDescs != 0 ) + delete[] priorDescs; +} + +void FactorWithAug::assignActions( ParseData *pd, FsmAp *graph, int *actionOrd ) +{ + /* Assign actions. */ + for ( int i = 0; i < actions.length(); i++ ) { + switch ( actions[i].type ) { + /* Transition actions. */ + case at_start: + graph->startFsmAction( actionOrd[i], actions[i].action ); + afterOpMinimize( graph ); + break; + case at_all: + graph->allTransAction( actionOrd[i], actions[i].action ); + break; + case at_finish: + graph->finishFsmAction( actionOrd[i], actions[i].action ); + break; + case at_leave: + graph->leaveFsmAction( actionOrd[i], actions[i].action ); + break; + + /* Global error actions. */ + case at_start_gbl_error: + graph->startErrorAction( actionOrd[i], actions[i].action, 0 ); + afterOpMinimize( graph ); + break; + case at_all_gbl_error: + graph->allErrorAction( actionOrd[i], actions[i].action, 0 ); + break; + case at_final_gbl_error: + graph->finalErrorAction( actionOrd[i], actions[i].action, 0 ); + break; + case at_not_start_gbl_error: + graph->notStartErrorAction( actionOrd[i], actions[i].action, 0 ); + break; + case at_not_final_gbl_error: + graph->notFinalErrorAction( actionOrd[i], actions[i].action, 0 ); + break; + case at_middle_gbl_error: + graph->middleErrorAction( actionOrd[i], actions[i].action, 0 ); + break; + + /* Local error actions. */ + case at_start_local_error: + graph->startErrorAction( actionOrd[i], actions[i].action, + actions[i].localErrKey ); + afterOpMinimize( graph ); + break; + case at_all_local_error: + graph->allErrorAction( actionOrd[i], actions[i].action, + actions[i].localErrKey ); + break; + case at_final_local_error: + graph->finalErrorAction( actionOrd[i], actions[i].action, + actions[i].localErrKey ); + break; + case at_not_start_local_error: + graph->notStartErrorAction( actionOrd[i], actions[i].action, + actions[i].localErrKey ); + break; + case at_not_final_local_error: + graph->notFinalErrorAction( actionOrd[i], actions[i].action, + actions[i].localErrKey ); + break; + case at_middle_local_error: + graph->middleErrorAction( actionOrd[i], actions[i].action, + actions[i].localErrKey ); + break; + + /* EOF actions. */ + case at_start_eof: + graph->startEOFAction( actionOrd[i], actions[i].action ); + afterOpMinimize( graph ); + break; + case at_all_eof: + graph->allEOFAction( actionOrd[i], actions[i].action ); + break; + case at_final_eof: + graph->finalEOFAction( actionOrd[i], actions[i].action ); + break; + case at_not_start_eof: + graph->notStartEOFAction( actionOrd[i], actions[i].action ); + break; + case at_not_final_eof: + graph->notFinalEOFAction( actionOrd[i], actions[i].action ); + break; + case at_middle_eof: + graph->middleEOFAction( actionOrd[i], actions[i].action ); + break; + + /* To State Actions. */ + case at_start_to_state: + graph->startToStateAction( actionOrd[i], actions[i].action ); + afterOpMinimize( graph ); + break; + case at_all_to_state: + graph->allToStateAction( actionOrd[i], actions[i].action ); + break; + case at_final_to_state: + graph->finalToStateAction( actionOrd[i], actions[i].action ); + break; + case at_not_start_to_state: + graph->notStartToStateAction( actionOrd[i], actions[i].action ); + break; + case at_not_final_to_state: + graph->notFinalToStateAction( actionOrd[i], actions[i].action ); + break; + case at_middle_to_state: + graph->middleToStateAction( actionOrd[i], actions[i].action ); + break; + + /* From State Actions. */ + case at_start_from_state: + graph->startFromStateAction( actionOrd[i], actions[i].action ); + afterOpMinimize( graph ); + break; + case at_all_from_state: + graph->allFromStateAction( actionOrd[i], actions[i].action ); + break; + case at_final_from_state: + graph->finalFromStateAction( actionOrd[i], actions[i].action ); + break; + case at_not_start_from_state: + graph->notStartFromStateAction( actionOrd[i], actions[i].action ); + break; + case at_not_final_from_state: + graph->notFinalFromStateAction( actionOrd[i], actions[i].action ); + break; + case at_middle_from_state: + graph->middleFromStateAction( actionOrd[i], actions[i].action ); + break; + + /* Remaining cases, prevented by the parser. */ + default: + assert( false ); + break; + } + } +} + +void FactorWithAug::assignPriorities( FsmAp *graph, int *priorOrd ) +{ + /* Assign priorities. */ + for ( int i = 0; i < priorityAugs.length(); i++ ) { + switch ( priorityAugs[i].type ) { + case at_start: + graph->startFsmPrior( priorOrd[i], &priorDescs[i]); + /* Start fsm priorities are a special case that may require + * minimization afterwards. */ + afterOpMinimize( graph ); + break; + case at_all: + graph->allTransPrior( priorOrd[i], &priorDescs[i] ); + break; + case at_finish: + graph->finishFsmPrior( priorOrd[i], &priorDescs[i] ); + break; + case at_leave: + graph->leaveFsmPrior( priorOrd[i], &priorDescs[i] ); + break; + + default: + /* Parser Prevents this case. */ + break; + } + } +} + +void FactorWithAug::assignConditions( FsmAp *graph ) +{ + for ( int i = 0; i < conditions.length(); i++ ) { + switch ( conditions[i].type ) { + /* Transition actions. */ + case at_start: + graph->startFsmCondition( conditions[i].action ); + afterOpMinimize( graph ); + break; + case at_all: + graph->allTransCondition( conditions[i].action ); + break; + case at_leave: + graph->leaveFsmCondition( conditions[i].action ); + break; + default: + break; + } + } +} + + +/* Evaluate a factor with augmentation node. */ +FsmAp *FactorWithAug::walk( ParseData *pd ) +{ + /* Enter into the scopes created for the labels. */ + NameFrame nameFrame = pd->enterNameScope( false, labels.length() ); + + /* Make the array of function orderings. */ + int *actionOrd = 0; + if ( actions.length() > 0 ) + actionOrd = new int[actions.length()]; + + /* First walk the list of actions, assigning order to all starting + * actions. */ + for ( int i = 0; i < actions.length(); i++ ) { + if ( actions[i].type == at_start || + actions[i].type == at_start_gbl_error || + actions[i].type == at_start_local_error || + actions[i].type == at_start_to_state || + actions[i].type == at_start_from_state || + actions[i].type == at_start_eof ) + actionOrd[i] = pd->curActionOrd++; + } + + /* Evaluate the factor with repetition. */ + FsmAp *rtnVal = factorWithRep->walk( pd ); + + /* Compute the remaining action orderings. */ + for ( int i = 0; i < actions.length(); i++ ) { + if ( actions[i].type != at_start && + actions[i].type != at_start_gbl_error && + actions[i].type != at_start_local_error && + actions[i].type != at_start_to_state && + actions[i].type != at_start_from_state && + actions[i].type != at_start_eof ) + actionOrd[i] = pd->curActionOrd++; + } + + /* Embed conditions. */ + assignConditions( rtnVal ); + + /* Embed actions. */ + assignActions( pd, rtnVal , actionOrd ); + + /* Make the array of priority orderings. Orderings are local to this walk + * of the factor with augmentation. */ + int *priorOrd = 0; + if ( priorityAugs.length() > 0 ) + priorOrd = new int[priorityAugs.length()]; + + /* Walk all priorities, assigning the priority ordering. */ + for ( int i = 0; i < priorityAugs.length(); i++ ) + priorOrd[i] = pd->curPriorOrd++; + + /* If the priority descriptors have not been made, make them now. Make + * priority descriptors for each priority asignment that will be passed to + * the fsm. Used to keep track of the key, value and used bit. */ + if ( priorDescs == 0 && priorityAugs.length() > 0 ) { + priorDescs = new PriorDesc[priorityAugs.length()]; + for ( int i = 0; i < priorityAugs.length(); i++ ) { + /* Init the prior descriptor for the priority setting. */ + priorDescs[i].key = priorityAugs[i].priorKey; + priorDescs[i].priority = priorityAugs[i].priorValue; + } + } + + /* Assign priorities into the machine. */ + assignPriorities( rtnVal, priorOrd ); + + /* Assign epsilon transitions. */ + for ( int e = 0; e < epsilonLinks.length(); e++ ) { + /* Get the name, which may not exist. If it doesn't then silently + * ignore it because an error has already been reported. */ + NameInst *epTarg = pd->epsilonResolvedLinks[pd->nextEpsilonResolvedLink++]; + if ( epTarg != 0 ) { + /* Make the epsilon transitions. */ + rtnVal->epsilonTrans( epTarg->id ); + + /* Note that we have made a link to the name. */ + pd->localNameScope->referencedNames.append( epTarg ); + } + } + + /* Set entry points for labels. */ + if ( labels.length() > 0 ) { + /* Pop the names. */ + pd->resetNameScope( nameFrame ); + + /* Make labels that are referenced into entry points. */ + for ( int i = 0; i < labels.length(); i++ ) { + pd->enterNameScope( false, 1 ); + + /* Will always be found. */ + NameInst *name = pd->curNameInst; + + /* If the name is referenced then set the entry point. */ + if ( name->numRefs > 0 ) + rtnVal->setEntry( name->id, rtnVal->startState ); + } + + pd->popNameScope( nameFrame ); + } + + if ( priorOrd != 0 ) + delete[] priorOrd; + if ( actionOrd != 0 ) + delete[] actionOrd; + return rtnVal; +} + +void FactorWithAug::makeNameTree( ParseData *pd ) +{ + /* Add the labels to the tree of instantiated names. Each label + * makes a new scope. */ + NameInst *prevNameInst = pd->curNameInst; + for ( int i = 0; i < labels.length(); i++ ) + pd->curNameInst = pd->addNameInst( labels[i].loc, labels[i].data, true ); + + /* Recurse, then pop the names. */ + factorWithRep->makeNameTree( pd ); + pd->curNameInst = prevNameInst; +} + + +void FactorWithAug::resolveNameRefs( ParseData *pd ) +{ + /* Enter into the name scope created by any labels. */ + NameFrame nameFrame = pd->enterNameScope( false, labels.length() ); + + /* Note action references. */ + for ( int i = 0; i < actions.length(); i++ ) + actions[i].action->actionRefs.append( pd->localNameScope ); + + /* Recurse first. IMPORTANT: we must do the exact same traversal as when + * the tree is constructed. */ + factorWithRep->resolveNameRefs( pd ); + + /* Resolve epsilon transitions. */ + for ( int ep = 0; ep < epsilonLinks.length(); ep++ ) { + /* Get the link. */ + EpsilonLink &link = epsilonLinks[ep]; + NameInst *resolvedName = 0; + + if ( link.target.length() == 1 && strcmp( link.target.data[0], "final" ) == 0 ) { + /* Epsilon drawn to an implicit final state. An implicit final is + * only available in join operations. */ + resolvedName = pd->localNameScope->final; + } + else { + /* Do an search for the name. */ + NameSet resolved; + pd->resolveFrom( resolved, pd->localNameScope, link.target, 0 ); + if ( resolved.length() > 0 ) { + /* Take the first one. */ + resolvedName = resolved[0]; + if ( resolved.length() > 1 ) { + /* Complain about the multiple references. */ + error(link.loc) << "state reference " << link.target << + " resolves to multiple entry points" << endl; + errorStateLabels( resolved ); + } + } + } + + /* This is tricky, we stuff resolved epsilon transitions into one long + * vector in the parse data structure. Since the name resolution and + * graph generation both do identical walks of the parse tree we + * should always find the link resolutions in the right place. */ + pd->epsilonResolvedLinks.append( resolvedName ); + + if ( resolvedName != 0 ) { + /* Found the name, bump of the reference count on it. */ + resolvedName->numRefs += 1; + } + else { + /* Complain, no recovery action, the epsilon op will ignore any + * epsilon transitions whose names did not resolve. */ + error(link.loc) << "could not resolve label " << link.target << endl; + } + } + + if ( labels.length() > 0 ) + pd->popNameScope( nameFrame ); +} + + +/* Clean up after a factor with repetition node. */ +FactorWithRep::~FactorWithRep() +{ + switch ( type ) { + case StarType: case StarStarType: case OptionalType: case PlusType: + case ExactType: case MaxType: case MinType: case RangeType: + delete factorWithRep; + break; + case FactorWithNegType: + delete factorWithNeg; + break; + } +} + +/* Evaluate a factor with repetition node. */ +FsmAp *FactorWithRep::walk( ParseData *pd ) +{ + FsmAp *retFsm = 0; + + switch ( type ) { + case StarType: { + /* Evaluate the FactorWithRep. */ + retFsm = factorWithRep->walk( pd ); + if ( retFsm->startState->isFinState() ) { + warning(loc) << "applying kleene star to a machine that " + "accepts zero length word" << endl; + } + + /* Shift over the start action orders then do the kleene star. */ + pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd ); + retFsm->starOp( ); + afterOpMinimize( retFsm ); + break; + } + case StarStarType: { + /* Evaluate the FactorWithRep. */ + retFsm = factorWithRep->walk( pd ); + if ( retFsm->startState->isFinState() ) { + warning(loc) << "applying kleene star to a machine that " + "accepts zero length word" << endl; + } + + /* Set up the prior descs. All gets priority one, whereas leaving gets + * priority zero. Make a unique key so that these priorities don't + * interfere with any priorities set by the user. */ + priorDescs[0].key = pd->nextPriorKey++; + priorDescs[0].priority = 1; + retFsm->allTransPrior( pd->curPriorOrd++, &priorDescs[0] ); + + /* Leaveing gets priority 0. Use same unique key. */ + priorDescs[1].key = priorDescs[0].key; + priorDescs[1].priority = 0; + retFsm->leaveFsmPrior( pd->curPriorOrd++, &priorDescs[1] ); + + /* Shift over the start action orders then do the kleene star. */ + pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd ); + retFsm->starOp( ); + afterOpMinimize( retFsm ); + break; + } + case OptionalType: { + /* Make the null fsm. */ + FsmAp *nu = new FsmAp(); + nu->lambdaFsm( ); + + /* Evaluate the FactorWithRep. */ + retFsm = factorWithRep->walk( pd ); + + /* Perform the question operator. */ + retFsm->unionOp( nu ); + afterOpMinimize( retFsm ); + break; + } + case PlusType: { + /* Evaluate the FactorWithRep. */ + retFsm = factorWithRep->walk( pd ); + if ( retFsm->startState->isFinState() ) { + warning(loc) << "applying plus operator to a machine that " + "accpets zero length word" << endl; + } + + /* Need a duplicated for the star end. */ + FsmAp *dup = new FsmAp( *retFsm ); + + /* The start func orders need to be shifted before doing the star. */ + pd->curActionOrd += dup->shiftStartActionOrder( pd->curActionOrd ); + + /* Star the duplicate. */ + dup->starOp( ); + afterOpMinimize( dup ); + + retFsm->concatOp( dup ); + afterOpMinimize( retFsm ); + break; + } + case ExactType: { + /* Get an int from the repetition amount. */ + if ( lowerRep == 0 ) { + /* No copies. Don't need to evaluate the factorWithRep. + * This Defeats the purpose so give a warning. */ + warning(loc) << "exactly zero repetitions results " + "in the null machine" << endl; + + retFsm = new FsmAp(); + retFsm->lambdaFsm(); + } + else { + /* Evaluate the first FactorWithRep. */ + retFsm = factorWithRep->walk( pd ); + if ( retFsm->startState->isFinState() ) { + warning(loc) << "applying repetition to a machine that " + "accepts zero length word" << endl; + } + + /* The start func orders need to be shifted before doing the + * repetition. */ + pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd ); + + /* Do the repetition on the machine. Already guarded against n == 0 */ + retFsm->repeatOp( lowerRep ); + afterOpMinimize( retFsm ); + } + break; + } + case MaxType: { + /* Get an int from the repetition amount. */ + if ( upperRep == 0 ) { + /* No copies. Don't need to evaluate the factorWithRep. + * This Defeats the purpose so give a warning. */ + warning(loc) << "max zero repetitions results " + "in the null machine" << endl; + + retFsm = new FsmAp(); + retFsm->lambdaFsm(); + } + else { + /* Evaluate the first FactorWithRep. */ + retFsm = factorWithRep->walk( pd ); + if ( retFsm->startState->isFinState() ) { + warning(loc) << "applying max repetition to a machine that " + "accepts zero length word" << endl; + } + + /* The start func orders need to be shifted before doing the + * repetition. */ + pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd ); + + /* Do the repetition on the machine. Already guarded against n == 0 */ + retFsm->optionalRepeatOp( upperRep ); + afterOpMinimize( retFsm ); + } + break; + } + case MinType: { + /* Evaluate the repeated machine. */ + retFsm = factorWithRep->walk( pd ); + if ( retFsm->startState->isFinState() ) { + warning(loc) << "applying min repetition to a machine that " + "accepts zero length word" << endl; + } + + /* The start func orders need to be shifted before doing the repetition + * and the kleene star. */ + pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd ); + + if ( lowerRep == 0 ) { + /* Acts just like a star op on the machine to return. */ + retFsm->starOp( ); + afterOpMinimize( retFsm ); + } + else { + /* Take a duplicate for the plus. */ + FsmAp *dup = new FsmAp( *retFsm ); + + /* Do repetition on the first half. */ + retFsm->repeatOp( lowerRep ); + afterOpMinimize( retFsm ); + + /* Star the duplicate. */ + dup->starOp( ); + afterOpMinimize( dup ); + + /* Tak on the kleene star. */ + retFsm->concatOp( dup ); + afterOpMinimize( retFsm ); + } + break; + } + case RangeType: { + /* Check for bogus range. */ + if ( upperRep - lowerRep < 0 ) { + error(loc) << "invalid range repetition" << endl; + + /* Return null machine as recovery. */ + retFsm = new FsmAp(); + retFsm->lambdaFsm(); + } + else if ( lowerRep == 0 && upperRep == 0 ) { + /* No copies. Don't need to evaluate the factorWithRep. This + * defeats the purpose so give a warning. */ + warning(loc) << "zero to zero repetitions results " + "in the null machine" << endl; + + retFsm = new FsmAp(); + retFsm->lambdaFsm(); + } + else { + /* Now need to evaluate the repeated machine. */ + retFsm = factorWithRep->walk( pd ); + if ( retFsm->startState->isFinState() ) { + warning(loc) << "applying range repetition to a machine that " + "accepts zero length word" << endl; + } + + /* The start func orders need to be shifted before doing both kinds + * of repetition. */ + pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd ); + + if ( lowerRep == 0 ) { + /* Just doing max repetition. Already guarded against n == 0. */ + retFsm->optionalRepeatOp( upperRep ); + afterOpMinimize( retFsm ); + } + else if ( lowerRep == upperRep ) { + /* Just doing exact repetition. Already guarded against n == 0. */ + retFsm->repeatOp( lowerRep ); + afterOpMinimize( retFsm ); + } + else { + /* This is the case that 0 < lowerRep < upperRep. Take a + * duplicate for the optional repeat. */ + FsmAp *dup = new FsmAp( *retFsm ); + + /* Do repetition on the first half. */ + retFsm->repeatOp( lowerRep ); + afterOpMinimize( retFsm ); + + /* Do optional repetition on the second half. */ + dup->optionalRepeatOp( upperRep - lowerRep ); + afterOpMinimize( dup ); + + /* Tak on the duplicate machine. */ + retFsm->concatOp( dup ); + afterOpMinimize( retFsm ); + } + } + break; + } + case FactorWithNegType: { + /* Evaluate the Factor. Pass it up. */ + retFsm = factorWithNeg->walk( pd ); + break; + }} + return retFsm; +} + +void FactorWithRep::makeNameTree( ParseData *pd ) +{ + switch ( type ) { + case StarType: + case StarStarType: + case OptionalType: + case PlusType: + case ExactType: + case MaxType: + case MinType: + case RangeType: + factorWithRep->makeNameTree( pd ); + break; + case FactorWithNegType: + factorWithNeg->makeNameTree( pd ); + break; + } +} + +void FactorWithRep::resolveNameRefs( ParseData *pd ) +{ + switch ( type ) { + case StarType: + case StarStarType: + case OptionalType: + case PlusType: + case ExactType: + case MaxType: + case MinType: + case RangeType: + factorWithRep->resolveNameRefs( pd ); + break; + case FactorWithNegType: + factorWithNeg->resolveNameRefs( pd ); + break; + } +} + +/* Clean up after a factor with negation node. */ +FactorWithNeg::~FactorWithNeg() +{ + switch ( type ) { + case NegateType: + case CharNegateType: + delete factorWithNeg; + break; + case FactorType: + delete factor; + break; + } +} + +/* Evaluate a factor with negation node. */ +FsmAp *FactorWithNeg::walk( ParseData *pd ) +{ + FsmAp *retFsm = 0; + + switch ( type ) { + case NegateType: { + /* Evaluate the factorWithNeg. */ + FsmAp *toNegate = factorWithNeg->walk( pd ); + + /* Negation is subtract from dot-star. */ + retFsm = dotStarFsm( pd ); + retFsm->subtractOp( toNegate ); + afterOpMinimize( retFsm ); + break; + } + case CharNegateType: { + /* Evaluate the factorWithNeg. */ + FsmAp *toNegate = factorWithNeg->walk( pd ); + + /* CharNegation is subtract from dot. */ + retFsm = dotFsm( pd ); + retFsm->subtractOp( toNegate ); + afterOpMinimize( retFsm ); + break; + } + case FactorType: { + /* Evaluate the Factor. Pass it up. */ + retFsm = factor->walk( pd ); + break; + }} + return retFsm; +} + +void FactorWithNeg::makeNameTree( ParseData *pd ) +{ + switch ( type ) { + case NegateType: + case CharNegateType: + factorWithNeg->makeNameTree( pd ); + break; + case FactorType: + factor->makeNameTree( pd ); + break; + } +} + +void FactorWithNeg::resolveNameRefs( ParseData *pd ) +{ + switch ( type ) { + case NegateType: + case CharNegateType: + factorWithNeg->resolveNameRefs( pd ); + break; + case FactorType: + factor->resolveNameRefs( pd ); + break; + } +} + +/* Clean up after a factor node. */ +Factor::~Factor() +{ + switch ( type ) { + case LiteralType: + delete literal; + break; + case RangeType: + delete range; + break; + case OrExprType: + delete reItem; + break; + case RegExprType: + delete regExpr; + break; + case ReferenceType: + break; + case ParenType: + delete join; + break; + case LongestMatchType: + delete longestMatch; + break; + } +} + +/* Evaluate a factor node. */ +FsmAp *Factor::walk( ParseData *pd ) +{ + FsmAp *rtnVal = 0; + switch ( type ) { + case LiteralType: + rtnVal = literal->walk( pd ); + break; + case RangeType: + rtnVal = range->walk( pd ); + break; + case OrExprType: + rtnVal = reItem->walk( pd, 0 ); + break; + case RegExprType: + rtnVal = regExpr->walk( pd, 0 ); + break; + case ReferenceType: + rtnVal = varDef->walk( pd ); + break; + case ParenType: + rtnVal = join->walk( pd ); + break; + case LongestMatchType: + rtnVal = longestMatch->walk( pd ); + break; + } + + return rtnVal; +} + +void Factor::makeNameTree( ParseData *pd ) +{ + switch ( type ) { + case LiteralType: + case RangeType: + case OrExprType: + case RegExprType: + break; + case ReferenceType: + varDef->makeNameTree( loc, pd ); + break; + case ParenType: + join->makeNameTree( pd ); + break; + case LongestMatchType: + longestMatch->makeNameTree( pd ); + break; + } +} + +void Factor::resolveNameRefs( ParseData *pd ) +{ + switch ( type ) { + case LiteralType: + case RangeType: + case OrExprType: + case RegExprType: + break; + case ReferenceType: + varDef->resolveNameRefs( pd ); + break; + case ParenType: + join->resolveNameRefs( pd ); + break; + case LongestMatchType: + longestMatch->resolveNameRefs( pd ); + break; + } +} + +/* Clean up a range object. Must delete the two literals. */ +Range::~Range() +{ + delete lowerLit; + delete upperLit; +} + +/* Evaluate a range. Gets the lower an upper key and makes an fsm range. */ +FsmAp *Range::walk( ParseData *pd ) +{ + /* Construct and verify the suitability of the lower end of the range. */ + FsmAp *lowerFsm = lowerLit->walk( pd ); + if ( !lowerFsm->checkSingleCharMachine() ) { + error(lowerLit->token.loc) << + "bad range lower end, must be a single character" << endl; + } + + /* Construct and verify the upper end. */ + FsmAp *upperFsm = upperLit->walk( pd ); + if ( !upperFsm->checkSingleCharMachine() ) { + error(upperLit->token.loc) << + "bad range upper end, must be a single character" << endl; + } + + /* Grab the keys from the machines, then delete them. */ + Key lowKey = lowerFsm->startState->outList.head->lowKey; + Key highKey = upperFsm->startState->outList.head->lowKey; + delete lowerFsm; + delete upperFsm; + + /* Validate the range. */ + if ( lowKey > highKey ) { + /* Recover by setting upper to lower; */ + error(lowerLit->token.loc) << "lower end of range is greater then upper end" << endl; + highKey = lowKey; + } + + /* Return the range now that it is validated. */ + FsmAp *retFsm = new FsmAp(); + retFsm->rangeFsm( lowKey, highKey ); + return retFsm; +} + +/* Evaluate a literal object. */ +FsmAp *Literal::walk( ParseData *pd ) +{ + /* FsmAp to return, is the alphabet signed. */ + FsmAp *rtnVal = 0; + + switch ( type ) { + case Number: { + /* Make the fsm key in int format. */ + Key fsmKey = makeFsmKeyNum( token.data, token.loc, pd ); + /* Make the new machine. */ + rtnVal = new FsmAp(); + rtnVal->concatFsm( fsmKey ); + break; + } + case LitString: { + /* Make the array of keys in int format. */ + Token interp; + bool caseInsensitive; + token.prepareLitString( interp, caseInsensitive ); + Key *arr = new Key[interp.length]; + makeFsmKeyArray( arr, interp.data, interp.length, pd ); + + /* Make the new machine. */ + rtnVal = new FsmAp(); + if ( caseInsensitive ) + rtnVal->concatFsmCI( arr, interp.length ); + else + rtnVal->concatFsm( arr, interp.length ); + delete[] interp.data; + delete[] arr; + break; + }} + return rtnVal; +} + +/* Clean up after a regular expression object. */ +RegExpr::~RegExpr() +{ + switch ( type ) { + case RecurseItem: + delete regExpr; + delete item; + break; + case Empty: + break; + } +} + +/* Evaluate a regular expression object. */ +FsmAp *RegExpr::walk( ParseData *pd, RegExpr *rootRegex ) +{ + /* This is the root regex, pass down a pointer to this. */ + if ( rootRegex == 0 ) + rootRegex = this; + + FsmAp *rtnVal = 0; + switch ( type ) { + case RecurseItem: { + /* Walk both items. */ + rtnVal = regExpr->walk( pd, rootRegex ); + FsmAp *fsm2 = item->walk( pd, rootRegex ); + rtnVal->concatOp( fsm2 ); + break; + } + case Empty: { + rtnVal = new FsmAp(); + rtnVal->lambdaFsm(); + break; + } + } + return rtnVal; +} + +/* Clean up after an item in a regular expression. */ +ReItem::~ReItem() +{ + switch ( type ) { + case Data: + case Dot: + break; + case OrBlock: + case NegOrBlock: + delete orBlock; + break; + } +} + +/* Evaluate a regular expression object. */ +FsmAp *ReItem::walk( ParseData *pd, RegExpr *rootRegex ) +{ + /* The fsm to return, is the alphabet signed? */ + FsmAp *rtnVal = 0; + + switch ( type ) { + case Data: { + /* Move the data into an integer array and make a concat fsm. */ + Key *arr = new Key[token.length]; + makeFsmKeyArray( arr, token.data, token.length, pd ); + + /* Make the concat fsm. */ + rtnVal = new FsmAp(); + if ( rootRegex != 0 && rootRegex->caseInsensitive ) + rtnVal->concatFsmCI( arr, token.length ); + else + rtnVal->concatFsm( arr, token.length ); + delete[] arr; + break; + } + case Dot: { + /* Make the dot fsm. */ + rtnVal = dotFsm( pd ); + break; + } + case OrBlock: { + /* Get the or block and minmize it. */ + rtnVal = orBlock->walk( pd, rootRegex ); + if ( rtnVal == 0 ) { + rtnVal = new FsmAp(); + rtnVal->lambdaFsm(); + } + rtnVal->minimizePartition2(); + break; + } + case NegOrBlock: { + /* Get the or block and minimize it. */ + FsmAp *fsm = orBlock->walk( pd, rootRegex ); + fsm->minimizePartition2(); + + /* Make a dot fsm and subtract from it. */ + rtnVal = dotFsm( pd ); + rtnVal->subtractOp( fsm ); + rtnVal->minimizePartition2(); + break; + } + } + + /* If the item is followed by a star, then apply the star op. */ + if ( star ) { + if ( rtnVal->startState->isFinState() ) { + warning(loc) << "applying kleene star to a machine that " + "accpets zero length word" << endl; + } + + rtnVal->starOp(); + rtnVal->minimizePartition2(); + } + return rtnVal; +} + +/* Clean up after an or block of a regular expression. */ +ReOrBlock::~ReOrBlock() +{ + switch ( type ) { + case RecurseItem: + delete orBlock; + delete item; + break; + case Empty: + break; + } +} + + +/* Evaluate an or block of a regular expression. */ +FsmAp *ReOrBlock::walk( ParseData *pd, RegExpr *rootRegex ) +{ + FsmAp *rtnVal = 0; + switch ( type ) { + case RecurseItem: { + /* Evaluate the two fsm. */ + FsmAp *fsm1 = orBlock->walk( pd, rootRegex ); + FsmAp *fsm2 = item->walk( pd, rootRegex ); + if ( fsm1 == 0 ) + rtnVal = fsm2; + else { + fsm1->unionOp( fsm2 ); + rtnVal = fsm1; + } + break; + } + case Empty: { + rtnVal = 0; + break; + } + } + return rtnVal;; +} + +/* Evaluate an or block item of a regular expression. */ +FsmAp *ReOrItem::walk( ParseData *pd, RegExpr *rootRegex ) +{ + /* The return value, is the alphabet signed? */ + FsmAp *rtnVal = 0; + switch ( type ) { + case Data: { + /* Make the or machine. */ + rtnVal = new FsmAp(); + + /* Put the or data into an array of ints. Note that we find unique + * keys. Duplicates are silently ignored. The alternative would be to + * issue warning or an error but since we can't with [a0-9a] or 'a' | + * 'a' don't bother here. */ + KeySet keySet; + makeFsmUniqueKeyArray( keySet, token.data, token.length, + rootRegex != 0 ? rootRegex->caseInsensitive : false, pd ); + + /* Run the or operator. */ + rtnVal->orFsm( keySet.data, keySet.length() ); + break; + } + case Range: { + /* Make the upper and lower keys. */ + Key lowKey = makeFsmKeyChar( lower, pd ); + Key highKey = makeFsmKeyChar( upper, pd ); + + /* Validate the range. */ + if ( lowKey > highKey ) { + /* Recover by setting upper to lower; */ + error(loc) << "lower end of range is greater then upper end" << endl; + highKey = lowKey; + } + + /* Make the range machine. */ + rtnVal = new FsmAp(); + rtnVal->rangeFsm( lowKey, highKey ); + + if ( rootRegex != 0 && rootRegex->caseInsensitive ) { + if ( lowKey <= 'Z' && 'A' <= highKey ) { + Key otherLow = lowKey < 'A' ? Key('A') : lowKey; + Key otherHigh = 'Z' < highKey ? Key('Z') : highKey; + + otherLow = 'a' + ( otherLow - 'A' ); + otherHigh = 'a' + ( otherHigh - 'A' ); + + FsmAp *otherRange = new FsmAp(); + otherRange->rangeFsm( otherLow, otherHigh ); + rtnVal->unionOp( otherRange ); + rtnVal->minimizePartition2(); + } + else if ( lowKey <= 'z' && 'a' <= highKey ) { + Key otherLow = lowKey < 'a' ? Key('a') : lowKey; + Key otherHigh = 'z' < highKey ? Key('z') : highKey; + + otherLow = 'A' + ( otherLow - 'a' ); + otherHigh = 'A' + ( otherHigh - 'a' ); + + FsmAp *otherRange = new FsmAp(); + otherRange->rangeFsm( otherLow, otherHigh ); + rtnVal->unionOp( otherRange ); + rtnVal->minimizePartition2(); + } + } + + break; + }} + return rtnVal; +} diff --git a/contrib/tools/ragel5/ragel/parsetree.h b/contrib/tools/ragel5/ragel/parsetree.h new file mode 100644 index 0000000000..4f398683a9 --- /dev/null +++ b/contrib/tools/ragel5/ragel/parsetree.h @@ -0,0 +1,755 @@ +/* + * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _PARSETREE_H +#define _PARSETREE_H + +#include "ragel.h" +#include "avlmap.h" +#include "bstmap.h" +#include "vector.h" +#include "dlist.h" + +struct NameInst; + +/* Types of builtin machines. */ +enum BuiltinMachine +{ + BT_Any, + BT_Ascii, + BT_Extend, + BT_Alpha, + BT_Digit, + BT_Alnum, + BT_Lower, + BT_Upper, + BT_Cntrl, + BT_Graph, + BT_Print, + BT_Punct, + BT_Space, + BT_Xdigit, + BT_Lambda, + BT_Empty +}; + + +struct ParseData; + +/* Leaf type. */ +struct Literal; + +/* Tree nodes. */ + +struct Term; +struct FactorWithAug; +struct FactorWithRep; +struct FactorWithNeg; +struct Factor; +struct Expression; +struct Join; +struct JoinOrLm; +struct LongestMatch; +struct LongestMatchPart; +struct LmPartList; +struct Range; + +/* Type of augmentation. Describes locations in the machine. */ +enum AugType +{ + /* Transition actions/priorities. */ + at_start, + at_all, + at_finish, + at_leave, + + /* Global error actions. */ + at_start_gbl_error, + at_all_gbl_error, + at_final_gbl_error, + at_not_start_gbl_error, + at_not_final_gbl_error, + at_middle_gbl_error, + + /* Local error actions. */ + at_start_local_error, + at_all_local_error, + at_final_local_error, + at_not_start_local_error, + at_not_final_local_error, + at_middle_local_error, + + /* To State Action embedding. */ + at_start_to_state, + at_all_to_state, + at_final_to_state, + at_not_start_to_state, + at_not_final_to_state, + at_middle_to_state, + + /* From State Action embedding. */ + at_start_from_state, + at_all_from_state, + at_final_from_state, + at_not_start_from_state, + at_not_final_from_state, + at_middle_from_state, + + /* EOF Action embedding. */ + at_start_eof, + at_all_eof, + at_final_eof, + at_not_start_eof, + at_not_final_eof, + at_middle_eof +}; + +/* IMPORTANT: These must follow the same order as the state augs in AugType + * since we will be using this to compose AugType. */ +enum StateAugType +{ + sat_start = 0, + sat_all, + sat_final, + sat_not_start, + sat_not_final, + sat_middle +}; + +struct Action; +struct PriorDesc; +struct RegExpr; +struct ReItem; +struct ReOrBlock; +struct ReOrItem; +struct ExplicitMachine; +struct InlineItem; +struct InlineList; + +/* Reference to a named state. */ +typedef Vector<char*> NameRef; +typedef Vector<NameRef*> NameRefList; +typedef Vector<NameInst*> NameTargList; + +/* Structure for storing location of epsilon transitons. */ +struct EpsilonLink +{ + EpsilonLink( const InputLoc &loc, NameRef &target ) + : loc(loc), target(target) { } + + InputLoc loc; + NameRef target; +}; + +struct Label +{ + Label( const InputLoc &loc, char *data ) + : loc(loc), data(data) { } + + InputLoc loc; + char *data; +}; + +/* Structrue represents an action assigned to some FactorWithAug node. The + * factor with aug will keep an array of these. */ +struct ParserAction +{ + ParserAction( const InputLoc &loc, AugType type, int localErrKey, Action *action ) + : loc(loc), type(type), localErrKey(localErrKey), action(action) { } + + InputLoc loc; + AugType type; + int localErrKey; + Action *action; +}; + +struct Token +{ + char *data; + int length; + InputLoc loc; + + void prepareLitString( Token &result, bool &caseInsensitive ); + void append( const Token &other ); + void set(const char *str, int len ); +}; + +/* Store the value and type of a priority augmentation. */ +struct PriorityAug +{ + PriorityAug( AugType type, int priorKey, int priorValue ) : + type(type), priorKey(priorKey), priorValue(priorValue) { } + + AugType type; + int priorKey; + int priorValue; +}; + +/* + * A Variable Definition + */ +struct VarDef +{ + VarDef(const char *name, JoinOrLm *joinOrLm ) + : name(name), joinOrLm(joinOrLm), isExport(false) { } + + /* Parse tree traversal. */ + FsmAp *walk( ParseData *pd ); + void makeNameTree( const InputLoc &loc, ParseData *pd ); + void resolveNameRefs( ParseData *pd ); + + const char *name; + JoinOrLm *joinOrLm; + bool isExport; +}; + + +/* + * LongestMatch + * + * Wherever possible the item match will execute on the character. If not + * possible the item match will execute on a lookahead character and either + * hold the current char (if one away) or backup. + * + * How to handle the problem of backing up over a buffer break? + * + * Don't want to use pending out transitions for embedding item match because + * the role of item match action is different: it may sometimes match on the + * final transition, or may match on a lookahead character. + * + * Don't want to invent a new operator just for this. So just trail action + * after machine, this means we can only use literal actions. + * + * The item action may + * + * What states of the machine will be final. The item actions that wrap around + * on the last character will go straight to the start state. + * + * Some transitions will be lookahead transitions, they will hold the current + * character. Crossing them with regular transitions must be restricted + * because it does not make sense. The transition cannot simultaneously hold + * and consume the current character. + */ +struct LongestMatchPart +{ + LongestMatchPart( Join *join, Action *action, + InputLoc &semiLoc, int longestMatchId ) + : + join(join), action(action), semiLoc(semiLoc), + longestMatchId(longestMatchId), inLmSelect(false) { } + + InputLoc getLoc(); + + Join *join; + Action *action; + InputLoc semiLoc; + + Action *setActId; + Action *actOnLast; + Action *actOnNext; + Action *actLagBehind; + int longestMatchId; + bool inLmSelect; + LongestMatch *longestMatch; + + LongestMatchPart *prev, *next; +}; + +/* Declare a new type so that ptreetypes.h need not include dlist.h. */ +struct LmPartList : DList<LongestMatchPart> {}; + +struct LongestMatch +{ + /* Construct with a list of joins */ + LongestMatch( const InputLoc &loc, LmPartList *longestMatchList ) : + loc(loc), longestMatchList(longestMatchList), name(0), + lmSwitchHandlesError(false) { } + + /* Tree traversal. */ + FsmAp *walk( ParseData *pd ); + void makeNameTree( ParseData *pd ); + void resolveNameRefs( ParseData *pd ); + void runLonestMatch( ParseData *pd, FsmAp *graph ); + Action *newAction( ParseData *pd, const InputLoc &loc, const char *name, + InlineList *inlineList ); + void makeActions( ParseData *pd ); + void findName( ParseData *pd ); + void restart( FsmAp *graph, TransAp *trans ); + + InputLoc loc; + LmPartList *longestMatchList; + const char *name; + + Action *lmActSelect; + bool lmSwitchHandlesError; + + LongestMatch *next, *prev; +}; + + +/* List of Expressions. */ +typedef DList<Expression> ExprList; + +struct JoinOrLm +{ + enum Type { + JoinType, + LongestMatchType + }; + + JoinOrLm( Join *join ) : + join(join), type(JoinType) {} + JoinOrLm( LongestMatch *longestMatch ) : + longestMatch(longestMatch), type(LongestMatchType) {} + + FsmAp *walk( ParseData *pd ); + void makeNameTree( ParseData *pd ); + void resolveNameRefs( ParseData *pd ); + + Join *join; + LongestMatch *longestMatch; + Type type; +}; + +/* + * Join + */ +struct Join +{ + /* Construct with the first expression. */ + Join( Expression *expr ); + Join( const InputLoc &loc, Expression *expr ); + + /* Tree traversal. */ + FsmAp *walk( ParseData *pd ); + FsmAp *walkJoin( ParseData *pd ); + void makeNameTree( ParseData *pd ); + void resolveNameRefs( ParseData *pd ); + + /* Data. */ + InputLoc loc; + ExprList exprList; +}; + +/* + * Expression + */ +struct Expression +{ + enum Type { + OrType, + IntersectType, + SubtractType, + StrongSubtractType, + TermType, + BuiltinType + }; + + /* Construct with an expression on the left and a term on the right. */ + Expression( Expression *expression, Term *term, Type type ) : + expression(expression), term(term), + builtin(builtin), type(type), prev(this), next(this) { } + + /* Construct with only a term. */ + Expression( Term *term ) : + expression(0), term(term), builtin(builtin), + type(TermType) , prev(this), next(this) { } + + /* Construct with a builtin type. */ + Expression( BuiltinMachine builtin ) : + expression(0), term(0), builtin(builtin), + type(BuiltinType), prev(this), next(this) { } + + ~Expression(); + + /* Tree traversal. */ + FsmAp *walk( ParseData *pd, bool lastInSeq = true ); + void makeNameTree( ParseData *pd ); + void resolveNameRefs( ParseData *pd ); + + /* Node data. */ + Expression *expression; + Term *term; + BuiltinMachine builtin; + Type type; + + Expression *prev, *next; +}; + +/* + * Term + */ +struct Term +{ + enum Type { + ConcatType, + RightStartType, + RightFinishType, + LeftType, + FactorWithAugType + }; + + Term( Term *term, FactorWithAug *factorWithAug ) : + term(term), factorWithAug(factorWithAug), type(ConcatType) { } + + Term( Term *term, FactorWithAug *factorWithAug, Type type ) : + term(term), factorWithAug(factorWithAug), type(type) { } + + Term( FactorWithAug *factorWithAug ) : + term(0), factorWithAug(factorWithAug), type(FactorWithAugType) { } + + ~Term(); + + FsmAp *walk( ParseData *pd, bool lastInSeq = true ); + void makeNameTree( ParseData *pd ); + void resolveNameRefs( ParseData *pd ); + + Term *term; + FactorWithAug *factorWithAug; + Type type; + + /* Priority descriptor for RightFinish type. */ + PriorDesc priorDescs[2]; +}; + + +/* Third level of precedence. Augmenting nodes with actions and priorities. */ +struct FactorWithAug +{ + FactorWithAug( FactorWithRep *factorWithRep ) : + priorDescs(0), factorWithRep(factorWithRep) { } + ~FactorWithAug(); + + /* Tree traversal. */ + FsmAp *walk( ParseData *pd ); + void makeNameTree( ParseData *pd ); + void resolveNameRefs( ParseData *pd ); + + void assignActions( ParseData *pd, FsmAp *graph, int *actionOrd ); + void assignPriorities( FsmAp *graph, int *priorOrd ); + + void assignConditions( FsmAp *graph ); + + /* Actions and priorities assigned to the factor node. */ + Vector<ParserAction> actions; + Vector<PriorityAug> priorityAugs; + PriorDesc *priorDescs; + Vector<Label> labels; + Vector<EpsilonLink> epsilonLinks; + Vector<ParserAction> conditions; + + FactorWithRep *factorWithRep; +}; + +/* Fourth level of precedence. Trailing unary operators. Provide kleen star, + * optional and plus. */ +struct FactorWithRep +{ + enum Type { + StarType, + StarStarType, + OptionalType, + PlusType, + ExactType, + MaxType, + MinType, + RangeType, + FactorWithNegType + }; + + FactorWithRep( const InputLoc &loc, FactorWithRep *factorWithRep, + int lowerRep, int upperRep, Type type ) : + loc(loc), factorWithRep(factorWithRep), + factorWithNeg(0), lowerRep(lowerRep), + upperRep(upperRep), type(type) { } + + FactorWithRep( FactorWithNeg *factorWithNeg ) + : factorWithNeg(factorWithNeg), type(FactorWithNegType) { } + + ~FactorWithRep(); + + /* Tree traversal. */ + FsmAp *walk( ParseData *pd ); + void makeNameTree( ParseData *pd ); + void resolveNameRefs( ParseData *pd ); + + InputLoc loc; + FactorWithRep *factorWithRep; + FactorWithNeg *factorWithNeg; + int lowerRep, upperRep; + Type type; + + /* Priority descriptor for StarStar type. */ + PriorDesc priorDescs[2]; +}; + +/* Fifth level of precedence. Provides Negation. */ +struct FactorWithNeg +{ + enum Type { + NegateType, + CharNegateType, + FactorType + }; + + FactorWithNeg( const InputLoc &loc, FactorWithNeg *factorWithNeg, Type type) : + loc(loc), factorWithNeg(factorWithNeg), factor(0), type(type) { } + + FactorWithNeg( Factor *factor ) : + factorWithNeg(0), factor(factor), type(FactorType) { } + + ~FactorWithNeg(); + + /* Tree traversal. */ + FsmAp *walk( ParseData *pd ); + void makeNameTree( ParseData *pd ); + void resolveNameRefs( ParseData *pd ); + + InputLoc loc; + FactorWithNeg *factorWithNeg; + Factor *factor; + Type type; +}; + +/* + * Factor + */ +struct Factor +{ + /* Language elements a factor node can be. */ + enum Type { + LiteralType, + RangeType, + OrExprType, + RegExprType, + ReferenceType, + ParenType, + LongestMatchType, + }; + + /* Construct with a literal fsm. */ + Factor( Literal *literal ) : + literal(literal), type(LiteralType) { } + + /* Construct with a range. */ + Factor( Range *range ) : + range(range), type(RangeType) { } + + /* Construct with the or part of a regular expression. */ + Factor( ReItem *reItem ) : + reItem(reItem), type(OrExprType) { } + + /* Construct with a regular expression. */ + Factor( RegExpr *regExpr ) : + regExpr(regExpr), type(RegExprType) { } + + /* Construct with a reference to a var def. */ + Factor( const InputLoc &loc, VarDef *varDef ) : + loc(loc), varDef(varDef), type(ReferenceType) {} + + /* Construct with a parenthesized join. */ + Factor( Join *join ) : + join(join), type(ParenType) {} + + /* Construct with a longest match operator. */ + Factor( LongestMatch *longestMatch ) : + longestMatch(longestMatch), type(LongestMatchType) {} + + /* Cleanup. */ + ~Factor(); + + /* Tree traversal. */ + FsmAp *walk( ParseData *pd ); + void makeNameTree( ParseData *pd ); + void resolveNameRefs( ParseData *pd ); + + InputLoc loc; + Literal *literal; + Range *range; + ReItem *reItem; + RegExpr *regExpr; + VarDef *varDef; + Join *join; + LongestMatch *longestMatch; + int lower, upper; + Type type; +}; + +/* A range machine. Only ever composed of two literals. */ +struct Range +{ + Range( Literal *lowerLit, Literal *upperLit ) + : lowerLit(lowerLit), upperLit(upperLit) { } + + ~Range(); + FsmAp *walk( ParseData *pd ); + + Literal *lowerLit; + Literal *upperLit; +}; + +/* Some literal machine. Can be a number or literal string. */ +struct Literal +{ + enum LiteralType { Number, LitString }; + + Literal( const Token &token, LiteralType type ) + : token(token), type(type) { } + + FsmAp *walk( ParseData *pd ); + + Token token; + LiteralType type; +}; + +/* Regular expression. */ +struct RegExpr +{ + enum RegExpType { RecurseItem, Empty }; + + /* Constructors. */ + RegExpr() : + type(Empty), caseInsensitive(false) { } + RegExpr(RegExpr *regExpr, ReItem *item) : + regExpr(regExpr), item(item), + type(RecurseItem), caseInsensitive(false) { } + + ~RegExpr(); + FsmAp *walk( ParseData *pd, RegExpr *rootRegex ); + + RegExpr *regExpr; + ReItem *item; + RegExpType type; + bool caseInsensitive; +}; + +/* An item in a regular expression. */ +struct ReItem +{ + enum ReItemType { Data, Dot, OrBlock, NegOrBlock }; + + ReItem( const InputLoc &loc, const Token &token ) + : loc(loc), token(token), star(false), type(Data) { } + ReItem( const InputLoc &loc, ReItemType type ) + : loc(loc), star(false), type(type) { } + ReItem( const InputLoc &loc, ReOrBlock *orBlock, ReItemType type ) + : loc(loc), orBlock(orBlock), star(false), type(type) { } + + ~ReItem(); + FsmAp *walk( ParseData *pd, RegExpr *rootRegex ); + + InputLoc loc; + Token token; + ReOrBlock *orBlock; + bool star; + ReItemType type; +}; + +/* An or block item. */ +struct ReOrBlock +{ + enum ReOrBlockType { RecurseItem, Empty }; + + /* Constructors. */ + ReOrBlock() + : type(Empty) { } + ReOrBlock(ReOrBlock *orBlock, ReOrItem *item) + : orBlock(orBlock), item(item), type(RecurseItem) { } + + ~ReOrBlock(); + FsmAp *walk( ParseData *pd, RegExpr *rootRegex ); + + ReOrBlock *orBlock; + ReOrItem *item; + ReOrBlockType type; +}; + +/* An item in an or block. */ +struct ReOrItem +{ + enum ReOrItemType { Data, Range }; + + ReOrItem( const InputLoc &loc, const Token &token ) + : loc(loc), token(token), type(Data) {} + ReOrItem( const InputLoc &loc, char lower, char upper ) + : loc(loc), lower(lower), upper(upper), type(Range) { } + + FsmAp *walk( ParseData *pd, RegExpr *rootRegex ); + + InputLoc loc; + Token token; + char lower; + char upper; + ReOrItemType type; +}; + + +/* + * Inline code tree + */ +struct InlineList; +struct InlineItem +{ + enum Type + { + Text, Goto, Call, Next, GotoExpr, CallExpr, NextExpr, Ret, PChar, + Char, Hold, Curs, Targs, Entry, Exec, LmSwitch, LmSetActId, + LmSetTokEnd, LmOnLast, LmOnNext, LmOnLagBehind, LmInitAct, + LmInitTokStart, LmSetTokStart, Break + }; + + InlineItem( const InputLoc &loc, char *data, Type type ) : + loc(loc), data(data), nameRef(0), children(0), type(type) { } + + InlineItem( const InputLoc &loc, NameRef *nameRef, Type type ) : + loc(loc), data(0), nameRef(nameRef), children(0), type(type) { } + + InlineItem( const InputLoc &loc, LongestMatch *longestMatch, + LongestMatchPart *longestMatchPart, Type type ) : loc(loc), data(0), + nameRef(0), children(0), longestMatch(longestMatch), + longestMatchPart(longestMatchPart), type(type) { } + + InlineItem( const InputLoc &loc, NameInst *nameTarg, Type type ) : + loc(loc), data(0), nameRef(0), nameTarg(nameTarg), children(0), + type(type) { } + + InlineItem( const InputLoc &loc, Type type ) : + loc(loc), data(0), nameRef(0), children(0), type(type) { } + + InputLoc loc; + char *data; + NameRef *nameRef; + NameInst *nameTarg; + InlineList *children; + LongestMatch *longestMatch; + LongestMatchPart *longestMatchPart; + Type type; + + InlineItem *prev, *next; +}; + +/* Normally this would be atypedef, but that would entail including DList from + * ptreetypes, which should be just typedef forwards. */ +struct InlineList : public DList<InlineItem> { }; + + + +#endif /* _PARSETREE_H */ diff --git a/contrib/tools/ragel5/ragel/ragel.h b/contrib/tools/ragel5/ragel/ragel.h new file mode 100644 index 0000000000..736369c0ce --- /dev/null +++ b/contrib/tools/ragel5/ragel/ragel.h @@ -0,0 +1,74 @@ +/* + * Copyright 2001-2007 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _RAGEL_H +#define _RAGEL_H + +#include <stdio.h> +#include <iostream> +#include <fstream> +#include <string> +#include "config.h" + +#define PROGNAME "ragel" + +/* To what degree are machine minimized. */ +enum MinimizeLevel { + MinimizeApprox, + MinimizeStable, + MinimizePartition1, + MinimizePartition2 +}; + +enum MinimizeOpt { + MinimizeNone, + MinimizeEnd, + MinimizeMostOps, + MinimizeEveryOp +}; + +/* Options. */ +extern MinimizeLevel minimizeLevel; +extern MinimizeOpt minimizeOpt; +extern char *machineSpec, *machineName; +extern bool printStatistics; + +extern int gblErrorCount; +extern char mainMachine[]; + +/* Location in an input file. */ +struct InputLoc +{ + const char *fileName; + int line; + int col; +}; + +/* Error reporting. */ +std::ostream &error(); +std::ostream &error( const InputLoc &loc ); +std::ostream &warning( const InputLoc &loc ); + +void terminateAllParsers( ); +void writeMachines( std::ostream &out, std::string hostData, const char *inputFileName ); +void xmlEscapeHost( std::ostream &out, char *data, int len ); + +#endif /* _RAGEL_H */ diff --git a/contrib/tools/ragel5/ragel/rlparse.cpp b/contrib/tools/ragel5/ragel/rlparse.cpp new file mode 100644 index 0000000000..cd6fbde218 --- /dev/null +++ b/contrib/tools/ragel5/ragel/rlparse.cpp @@ -0,0 +1,6088 @@ +/* Automatically generated by Kelbt from "rlparse.kl". + * + * Parts of this file are copied from Kelbt source covered by the GNU + * GPL. As a special exception, you may use the parts of this file copied + * from Kelbt source without restriction. The remainder is derived from + * "rlparse.kl" and inherits the copyright status of that file. + */ + +#line 1 "rlparse.kl" +/* + * Copyright 2001-2007 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "rlparse.h" +#include "ragel.h" +#include <iostream> +#include <errno.h> + +#include <stdlib.h> +//#include <malloc.h> + +using std::cout; +using std::cerr; +using std::endl; + +ParserDict parserDict; + +#line 93 "rlparse.kh" +#line 96 "rlparse.kh" +#line 126 "rlparse.kh" +#line 1370 "rlparse.kl" + + +#line 50 "rlparse.cpp" +struct Parser_Lel_action_ref +{ +#line 682 "rlparse.kl" + + Action *action; + + +#line 57 "rlparse.cpp" +}; + +struct Parser_Lel_aug_type +{ +#line 475 "rlparse.kl" + + InputLoc loc; + AugType augType; + + +#line 68 "rlparse.cpp" +}; + +struct Parser_Lel_expression +{ +#line 297 "rlparse.kl" + + Expression *expression; + + +#line 78 "rlparse.cpp" +}; + +struct Parser_Lel_factor +{ +#line 907 "rlparse.kl" + + Factor *factor; + + +#line 88 "rlparse.cpp" +}; + +struct Parser_Lel_factor_rep_num +{ +#line 861 "rlparse.kl" + + int rep; + + +#line 98 "rlparse.cpp" +}; + +struct Parser_Lel_factor_with_aug +{ +#line 392 "rlparse.kl" + + FactorWithAug *factorWithAug; + + +#line 108 "rlparse.cpp" +}; + +struct Parser_Lel_factor_with_ep +{ +#line 376 "rlparse.kl" + + FactorWithAug *factorWithAug; + + +#line 118 "rlparse.cpp" +}; + +struct Parser_Lel_factor_with_label +{ +#line 360 "rlparse.kl" + + FactorWithAug *factorWithAug; + + +#line 128 "rlparse.cpp" +}; + +struct Parser_Lel_factor_with_neg +{ +#line 887 "rlparse.kl" + + FactorWithNeg *factorWithNeg; + + +#line 138 "rlparse.cpp" +}; + +struct Parser_Lel_factor_with_rep +{ +#line 811 "rlparse.kl" + + FactorWithRep *factorWithRep; + + +#line 148 "rlparse.cpp" +}; + +struct Parser_Lel_inline_item +{ +#line 1160 "rlparse.kl" + + InlineItem *inlineItem; + + +#line 158 "rlparse.cpp" +}; + +struct Parser_Lel_inline_list +{ +#line 1139 "rlparse.kl" + + InlineList *inlineList; + + +#line 168 "rlparse.cpp" +}; + +struct Parser_Lel_join +{ +#line 281 "rlparse.kl" + + Join *join; + + +#line 178 "rlparse.cpp" +}; + +struct Parser_Lel_join_or_lm +{ +#line 204 "rlparse.kl" + + JoinOrLm *joinOrLm; + + +#line 188 "rlparse.cpp" +}; + +struct Parser_Lel_lm_part_list +{ +#line 224 "rlparse.kl" + + LmPartList *lmPartList; + + +#line 198 "rlparse.cpp" +}; + +struct Parser_Lel_local_err_name +{ +#line 790 "rlparse.kl" + + int error_name; + + +#line 208 "rlparse.cpp" +}; + +struct Parser_Lel_longest_match_part +{ +#line 243 "rlparse.kl" + + LongestMatchPart *lmPart; + + +#line 218 "rlparse.cpp" +}; + +struct Parser_Lel_opt_export +{ +#line 64 "rlparse.kl" + + bool isSet; + + +#line 228 "rlparse.cpp" +}; + +struct Parser_Lel_opt_lm_part_action +{ +#line 262 "rlparse.kl" + + Action *action; + + +#line 238 "rlparse.cpp" +}; + +struct Parser_Lel_priority_aug +{ +#line 741 "rlparse.kl" + + int priorityNum; + + +#line 248 "rlparse.cpp" +}; + +struct Parser_Lel_priority_name +{ +#line 723 "rlparse.kl" + + int priorityName; + + +#line 258 "rlparse.cpp" +}; + +struct Parser_Lel_range_lit +{ +#line 975 "rlparse.kl" + + Literal *literal; + + +#line 268 "rlparse.cpp" +}; + +struct Parser_Lel_regular_expr +{ +#line 1013 "rlparse.kl" + + RegExpr *regExpr; + + +#line 278 "rlparse.cpp" +}; + +struct Parser_Lel_regular_expr_char +{ +#line 1062 "rlparse.kl" + + ReItem *reItem; + + +#line 288 "rlparse.cpp" +}; + +struct Parser_Lel_regular_expr_item +{ +#line 1046 "rlparse.kl" + + ReItem *reItem; + + +#line 298 "rlparse.cpp" +}; + +struct Parser_Lel_regular_expr_or_char +{ +#line 1121 "rlparse.kl" + + ReOrItem *reOrItem; + + +#line 308 "rlparse.cpp" +}; + +struct Parser_Lel_regular_expr_or_data +{ +#line 1088 "rlparse.kl" + + ReOrBlock *reOrBlock; + + +#line 318 "rlparse.cpp" +}; + +struct Parser_Lel_term +{ +#line 329 "rlparse.kl" + + Term *term; + + +#line 328 "rlparse.cpp" +}; + +struct Parser_Lel_token_type +{ +#line 104 "rlparse.kl" + + Token token; + + +#line 338 "rlparse.cpp" +}; + +union Parser_UserData +{ + struct Parser_Lel_action_ref action_ref; + struct Parser_Lel_aug_type aug_type; + struct Parser_Lel_expression expression; + struct Parser_Lel_factor factor; + struct Parser_Lel_factor_rep_num factor_rep_num; + struct Parser_Lel_factor_with_aug factor_with_aug; + struct Parser_Lel_factor_with_ep factor_with_ep; + struct Parser_Lel_factor_with_label factor_with_label; + struct Parser_Lel_factor_with_neg factor_with_neg; + struct Parser_Lel_factor_with_rep factor_with_rep; + struct Parser_Lel_inline_item inline_item; + struct Parser_Lel_inline_list inline_list; + struct Parser_Lel_join join; + struct Parser_Lel_join_or_lm join_or_lm; + struct Parser_Lel_lm_part_list lm_part_list; + struct Parser_Lel_local_err_name local_err_name; + struct Parser_Lel_longest_match_part longest_match_part; + struct Parser_Lel_opt_export opt_export; + struct Parser_Lel_opt_lm_part_action opt_lm_part_action; + struct Parser_Lel_priority_aug priority_aug; + struct Parser_Lel_priority_name priority_name; + struct Parser_Lel_range_lit range_lit; + struct Parser_Lel_regular_expr regular_expr; + struct Parser_Lel_regular_expr_char regular_expr_char; + struct Parser_Lel_regular_expr_item regular_expr_item; + struct Parser_Lel_regular_expr_or_char regular_expr_or_char; + struct Parser_Lel_regular_expr_or_data regular_expr_or_data; + struct Parser_Lel_term term; + struct Parser_Lel_token_type token_type; + struct Token token; +}; + +struct Parser_LangEl +{ + char *file; + int line; + int type; + int reduction; + int state; + union Parser_UserData user; + unsigned int retry; + struct Parser_LangEl *next, *child; +}; + +#line 388 "rlparse.cpp" +unsigned int Parser_startState = 0; + +short Parser_indicies[] = { + 151, -1, -1, -1, -1, -1, 151, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 151, 151, 151, 151, -1, -1, + -1, -1, -1, -1, 151, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, 151, 151, -1, 151, 1, 0, 393, + 153, -1, -1, -1, -1, -1, 153, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 153, 153, 153, 153, -1, -1, + -1, -1, -1, -1, 153, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, 153, 153, -1, 149, -1, -1, 2, + 157, -1, -1, -1, -1, -1, 150, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 4, 5, 6, 7, -1, -1, + -1, -1, -1, -1, 154, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, 9, 8, -1, -1, -1, -1, -1, + 152, 384, 385, 386, 387, 388, 389, 390, + 391, 392, 10, 3, 161, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + 24, 11, 12, 14, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, 318, 320, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 13, 356, 356, 356, -1, 356, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 356, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 356, + -1, -1, -1, -1, -1, -1, 356, 356, + -1, -1, -1, -1, -1, -1, -1, -1, + 356, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 356, 356, 356, + 356, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 356, + 356, -1, -1, -1, 356, 356, 356, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + 20, 356, 356, 356, -1, 356, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 356, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 356, -1, -1, + -1, -1, -1, -1, 356, 356, -1, -1, + -1, -1, -1, -1, -1, -1, 356, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, 356, 356, 356, 356, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 356, 356, -1, + -1, -1, 356, 356, 356, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 22, 170, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 170, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, 21, 23, -1, -1, -1, -1, -1, + -1, -1, -1, 155, 25, 164, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 26, 14, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 318, 320, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 27, 319, + 368, 369, 370, -1, 367, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, 166, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 366, -1, -1, -1, + -1, -1, -1, 364, 365, -1, -1, -1, + -1, -1, -1, -1, -1, 371, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 360, 361, 362, 363, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 372, 373, -1, -1, + -1, 374, 375, 28, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 357, -1, 359, -1, 355, 358, + 29, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 169, 368, + 369, 370, -1, 367, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 167, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, 366, -1, -1, -1, -1, + -1, -1, 364, 365, -1, -1, -1, -1, + -1, -1, -1, -1, 371, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, 360, 361, 362, 363, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, 372, 373, -1, -1, -1, + 374, 375, 28, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, 357, -1, 359, -1, 355, 358, 153, + -1, -1, -1, -1, -1, -1, 153, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + 153, 153, 153, 153, -1, -1, -1, -1, + -1, -1, 153, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 153, + 153, -1, -1, -1, -1, 30, 31, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 32, 334, 334, 334, -1, 334, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 334, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + 334, -1, -1, -1, -1, -1, -1, 334, + -1, -1, -1, -1, -1, -1, 334, 334, + -1, -1, -1, -1, -1, -1, -1, -1, + 334, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 334, 334, 334, + 334, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 334, 334, 334, 334, + 334, 334, 334, 334, 334, 334, 334, 334, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 33, 163, + 165, 34, 356, 356, 356, -1, 356, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 356, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 356, -1, + -1, -1, -1, -1, -1, 356, 356, -1, + -1, -1, -1, -1, -1, -1, -1, 356, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 356, 356, 356, 356, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 356, 356, + -1, -1, -1, 356, 356, 356, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 35, + 158, -1, -1, -1, -1, -1, -1, 157, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, 4, 5, 6, 7, -1, -1, -1, + -1, -1, -1, 154, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + 9, 8, -1, -1, -1, -1, -1, 152, + 384, 385, 386, 387, 388, 389, 390, 391, + 392, 10, 3, 44, -1, -1, -1, -1, + -1, -1, 52, -1, -1, -1, -1, 14, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + 45, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 39, 46, + -1, -1, -1, -1, -1, 318, 320, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 50, 48, 49, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, 36, -1, -1, 47, -1, + -1, -1, -1, -1, -1, -1, 37, 38, + 193, 41, -1, 42, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 43, -1, + -1, -1, 300, 304, -1, -1, 51, 44, + -1, -1, -1, -1, -1, -1, 52, -1, + -1, -1, -1, 14, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 45, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 39, 46, -1, -1, -1, -1, + -1, 318, 320, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 55, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + 50, 48, 49, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 54, + 53, -1, 47, -1, -1, -1, -1, -1, + -1, -1, 37, 38, 193, 41, -1, 42, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 43, -1, -1, -1, 300, 304, + -1, -1, 51, 340, 341, 342, -1, 338, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 339, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + 162, -1, -1, -1, -1, -1, -1, 366, + -1, -1, -1, -1, -1, -1, 364, 365, + -1, -1, -1, -1, -1, -1, -1, -1, + 343, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 360, 361, 362, + 363, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 62, 57, 56, 372, + 373, 58, 60, 61, 374, 375, 28, 59, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, 333, 337, 335, 336, 344, + 381, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, 380, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, 63, -1, -1, -1, -1, 64, 368, + 369, 370, -1, 367, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 168, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, 366, -1, -1, -1, -1, + -1, -1, 364, 365, -1, -1, -1, -1, + -1, -1, -1, -1, 371, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, 360, 361, 362, 363, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, 372, 373, -1, -1, -1, + 374, 375, 28, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, 357, -1, 359, -1, 355, 358, 70, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 159, 72, + -1, -1, 182, -1, -1, 182, 73, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 182, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 182, 71, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 182, -1, -1, -1, + 74, 44, -1, -1, -1, -1, 187, -1, + 52, 187, -1, -1, 187, 19, 75, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, 187, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 45, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, 187, 187, -1, -1, -1, + -1, -1, -1, -1, 39, 46, -1, -1, + -1, -1, -1, 318, 320, -1, -1, 76, + 77, 78, -1, 187, -1, -1, -1, 187, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 50, 48, 49, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 47, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 188, 41, + -1, 42, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 43, -1, -1, -1, + 300, 304, -1, -1, 51, 307, -1, -1, + 307, 307, 307, -1, 307, 307, 307, 307, + 307, 307, 307, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 66, 307, + 307, -1, 307, 307, 307, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 307, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 307, + 307, -1, -1, -1, -1, -1, -1, -1, + 307, 307, -1, -1, -1, -1, -1, 307, + 307, -1, -1, 307, 307, 307, 307, 307, + 307, -1, -1, 307, 307, 307, 307, 307, + 307, 307, 307, 307, 307, 307, 307, 307, + 307, 307, 307, 307, 307, 307, 307, 307, + 307, 307, 307, 307, 307, 307, 307, 307, + 307, 307, 307, 307, 307, 307, 307, 307, + 307, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 307, 195, + -1, -1, -1, -1, 195, -1, 195, 195, + -1, -1, 195, 195, 195, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, 195, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 195, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, 195, 195, -1, -1, -1, -1, -1, + -1, -1, 195, 195, -1, -1, -1, -1, + -1, 195, 195, -1, -1, 195, 195, 195, + 79, 195, -1, -1, -1, 195, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + 195, 195, 195, 197, -1, -1, 89, 88, + 197, -1, 197, 197, -1, -1, 197, 197, + 197, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 197, 91, -1, + 90, -1, 87, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + 197, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 197, 197, -1, + -1, -1, -1, -1, -1, -1, 197, 197, + -1, -1, -1, -1, -1, 197, 197, -1, + -1, 197, 197, 197, 197, 197, -1, -1, + -1, 197, 213, 215, 217, 92, 256, 260, + 262, 264, 258, 266, 268, 272, 274, 276, + 270, 278, 244, 248, 250, 252, 246, 254, + 220, 224, 226, 228, 222, 230, 232, 236, + 238, 240, 234, 242, 197, 197, 197, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 219, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 80, -1, -1, 81, + 82, 83, 84, 85, 86, 208, -1, -1, + 208, 208, 208, -1, 208, 208, 292, 295, + 208, 208, 208, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 208, + 208, -1, 208, 294, 208, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 208, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 93, + 208, -1, -1, -1, -1, -1, -1, -1, + 208, 208, -1, -1, -1, -1, -1, 208, + 208, -1, -1, 208, 208, 208, 208, 208, + 293, -1, -1, 208, 208, 208, 208, 208, + 208, 208, 208, 208, 208, 208, 208, 208, + 208, 208, 208, 208, 208, 208, 208, 208, + 208, 208, 208, 208, 208, 208, 208, 208, + 208, 208, 208, 208, 208, 208, 208, 208, + 208, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 208, 44, + -1, -1, -1, -1, -1, -1, 52, -1, + -1, -1, -1, 14, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 45, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 308, 46, -1, -1, -1, -1, + -1, 318, 320, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + 50, 48, 49, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 47, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 302, 304, + -1, -1, 51, 44, -1, -1, -1, -1, + -1, -1, 52, -1, -1, -1, -1, 14, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + 45, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 308, 46, + -1, -1, -1, -1, -1, 318, 320, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 50, 48, 49, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 47, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 303, 304, -1, -1, 51, 305, + -1, -1, 305, 305, 305, -1, 305, 305, + 305, 305, 305, 305, 305, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, 305, 305, -1, 305, 305, 305, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 305, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, 305, 305, -1, -1, -1, -1, -1, + -1, -1, 305, 305, -1, -1, -1, -1, + -1, 305, 305, -1, 314, 305, 305, 305, + 305, 305, 305, -1, -1, 305, 305, 305, + 305, 305, 305, 305, 305, 305, 305, 305, + 305, 305, 305, 305, 305, 305, 305, 305, + 305, 305, 305, 305, 305, 305, 305, 305, + 305, 305, 305, 305, 305, 305, 305, 305, + 305, 305, 305, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + 305, 306, -1, -1, 306, 306, 306, -1, + 306, 306, 306, 306, 306, 306, 306, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, 306, 306, -1, 306, 306, + 306, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 306, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, 306, 306, -1, -1, -1, + -1, -1, -1, -1, 306, 306, -1, -1, + -1, -1, -1, 306, 306, -1, 316, 306, + 306, 306, 306, 306, 306, -1, -1, 306, + 306, 306, 306, 306, 306, 306, 306, 306, + 306, 306, 306, 306, 306, 306, 306, 306, + 306, 306, 306, 306, 306, 306, 306, 306, + 306, 306, 306, 306, 306, 306, 306, 306, + 306, 306, 306, 306, 306, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 306, 330, -1, -1, -1, 330, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + 68, 330, -1, -1, -1, 330, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 69, 322, + 322, 322, -1, 322, -1, -1, 322, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + 67, 94, 44, -1, -1, -1, -1, -1, + -1, 52, -1, -1, -1, -1, 14, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 45, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 39, 46, -1, + -1, -1, -1, -1, 318, 320, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, 50, 48, 49, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 65, -1, -1, 47, -1, -1, + -1, -1, -1, -1, -1, 37, 38, 193, + 41, -1, 42, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 43, -1, -1, + -1, 300, 304, -1, -1, 51, 160, 70, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 171, 44, + -1, -1, -1, -1, -1, -1, 52, -1, + -1, -1, -1, 14, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 45, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 40, 46, -1, -1, -1, -1, + -1, 318, 320, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + 50, 48, 49, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 4, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 154, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, 176, -1, 175, -1, -1, + -1, -1, -1, -1, 156, 97, -1, 96, + -1, -1, 47, -1, -1, 95, 174, -1, + -1, -1, 37, 38, 193, 41, -1, 42, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 43, -1, -1, -1, 300, 304, + -1, -1, 51, 345, 356, 356, 356, -1, + 356, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 356, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + 356, -1, -1, -1, -1, -1, -1, 356, + 356, -1, -1, -1, -1, -1, -1, -1, + -1, 356, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 356, 356, + 356, 356, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + 356, 356, -1, -1, -1, 356, 356, 356, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, 98, 100, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 381, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 380, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 99, -1, -1, + -1, -1, 64, 104, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 381, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 380, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 103, -1, + -1, -1, -1, 64, 102, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 381, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 380, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 101, + -1, -1, -1, -1, 64, 353, 354, 376, + 383, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 105, 313, -1, + -1, 70, 44, -1, -1, -1, -1, -1, + -1, 52, -1, -1, -1, -1, 14, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 45, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 39, 46, -1, + -1, -1, -1, -1, 318, 320, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, 50, 48, 49, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 47, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 194, + 41, -1, 42, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 43, -1, -1, + -1, 300, 304, -1, -1, 51, 311, 107, + 108, -1, 327, -1, -1, 328, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, 321, 106, 309, -1, -1, -1, 109, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 329, 310, -1, + -1, -1, 109, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + 329, 44, -1, -1, -1, -1, -1, -1, + 52, -1, -1, -1, -1, 14, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 45, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 39, 46, -1, -1, + -1, -1, -1, 318, 320, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 50, 48, 49, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 47, -1, -1, -1, + -1, -1, -1, -1, 110, 38, 193, 41, + -1, 42, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 43, -1, -1, -1, + 300, 304, -1, -1, 51, 44, -1, -1, + -1, -1, -1, -1, 52, -1, -1, -1, + -1, 14, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 45, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + 39, 46, -1, -1, -1, -1, -1, 318, + 320, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 50, 48, + 49, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + 47, -1, -1, -1, -1, -1, -1, -1, + -1, 113, 193, 41, -1, 42, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + 43, -1, -1, -1, 300, 304, -1, -1, + 51, 44, -1, -1, -1, -1, -1, -1, + 52, -1, -1, -1, -1, 14, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 45, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 39, 46, -1, -1, + -1, -1, -1, 318, 320, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 50, 48, 49, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 47, -1, -1, -1, + -1, -1, -1, -1, -1, 111, 193, 41, + -1, 42, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 43, -1, -1, -1, + 300, 304, -1, -1, 51, 44, -1, -1, + -1, -1, -1, -1, 52, -1, -1, -1, + -1, 14, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 45, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + 39, 46, -1, -1, -1, -1, -1, 318, + 320, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 50, 48, + 49, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + 47, -1, -1, -1, -1, -1, -1, -1, + -1, 112, 193, 41, -1, 42, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + 43, -1, -1, -1, 300, 304, -1, -1, + 51, 44, -1, -1, -1, -1, -1, -1, + 52, -1, -1, -1, -1, 14, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 45, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 39, 46, -1, -1, + -1, -1, -1, 318, 320, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 50, 48, 49, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 47, -1, -1, -1, + -1, -1, -1, -1, -1, 114, 193, 41, + -1, 42, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 43, -1, -1, -1, + 300, 304, -1, -1, 51, 44, -1, -1, + -1, -1, -1, -1, 52, -1, -1, -1, + -1, 14, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 45, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + 39, 46, -1, -1, -1, -1, -1, 318, + 320, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 50, 48, + 49, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + 47, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 189, 41, -1, 42, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + 43, -1, -1, -1, 300, 304, -1, -1, + 51, 44, -1, -1, -1, -1, -1, -1, + 52, -1, -1, -1, -1, 14, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 45, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 39, 46, -1, -1, + -1, -1, -1, 318, 320, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 50, 48, 49, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 47, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 190, 41, + -1, 42, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 43, -1, -1, -1, + 300, 304, -1, -1, 51, 44, -1, -1, + -1, -1, -1, -1, 52, -1, -1, -1, + -1, 14, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 45, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + 39, 46, -1, -1, -1, -1, -1, 318, + 320, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 50, 48, + 49, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + 47, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 191, 41, -1, 42, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + 43, -1, -1, -1, 300, 304, -1, -1, + 51, 44, -1, -1, -1, -1, -1, -1, + 52, -1, -1, -1, -1, 14, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 45, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 39, 46, -1, -1, + -1, -1, -1, 318, 320, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 50, 48, 49, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 47, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 192, 41, + -1, 42, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 43, -1, -1, -1, + 300, 304, -1, -1, 51, 378, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, 196, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, 115, 116, -1, -1, 118, -1, 119, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 117, -1, -1, + -1, -1, -1, -1, -1, -1, 284, -1, + -1, -1, -1, -1, -1, 288, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 198, 282, -1, -1, + -1, -1, -1, -1, -1, 199, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 280, + 287, 120, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 117, -1, -1, -1, + -1, -1, -1, -1, -1, 284, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, 201, 282, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 280, 120, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 117, -1, -1, -1, -1, -1, + -1, -1, -1, 284, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, 202, 282, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 280, 120, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + 117, -1, -1, -1, -1, -1, -1, -1, + -1, 284, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 203, + 282, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 280, 120, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 117, -1, + -1, -1, -1, -1, -1, -1, -1, 284, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 204, 282, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + 280, 120, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 117, -1, -1, -1, + -1, -1, -1, -1, -1, 284, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, 205, 282, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 280, 121, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 117, -1, -1, -1, -1, -1, + -1, -1, -1, 284, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, 206, 282, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 280, 209, -1, -1, + 209, -1, 209, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + 209, -1, -1, -1, -1, -1, -1, -1, + -1, 209, -1, -1, -1, -1, -1, -1, + 209, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + 253, 265, 277, 229, 241, 210, -1, -1, + 210, -1, 210, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + 210, -1, -1, -1, -1, -1, -1, -1, + -1, 210, -1, -1, -1, -1, -1, -1, + 210, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 218, + 251, 263, 275, 227, 239, 211, -1, -1, + 211, -1, 211, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + 211, -1, -1, -1, -1, -1, -1, -1, + -1, 211, -1, -1, -1, -1, -1, -1, + 211, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 216, + 249, 261, 273, 225, 237, 212, -1, -1, + 212, -1, 212, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + 212, -1, -1, -1, -1, -1, -1, -1, + -1, 212, -1, -1, -1, -1, -1, -1, + 212, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 214, + 245, 257, 269, 221, 233, 247, 259, 271, + 223, 235, 255, 267, 279, 231, 243, 123, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 301, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 122, 14, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, 315, -1, -1, -1, -1, + -1, 318, 320, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 317, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 312, 44, -1, -1, -1, -1, + -1, -1, 52, -1, 127, -1, -1, 14, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + 45, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 40, 46, + -1, -1, -1, -1, -1, 318, 320, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 50, 48, 49, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + 4, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 154, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 176, + -1, 175, -1, -1, -1, -1, -1, -1, + 156, 97, -1, 96, -1, -1, 47, -1, + -1, -1, 173, -1, -1, -1, 37, 38, + 193, 41, -1, 42, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 43, -1, + -1, -1, 300, 304, -1, -1, 51, 70, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 180, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 117, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 126, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 125, -1, 179, 161, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, 124, 368, 369, 370, -1, + 367, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 346, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + 366, -1, -1, -1, -1, -1, -1, 364, + 365, -1, -1, -1, -1, -1, -1, -1, + -1, 371, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 360, 361, + 362, 363, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + 372, 373, -1, -1, -1, 374, 375, 28, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 357, -1, + 359, -1, 355, 358, 347, 356, 356, 356, + -1, 356, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + 356, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, 356, -1, -1, -1, -1, -1, -1, + 356, 356, -1, -1, -1, -1, -1, -1, + -1, -1, 356, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 356, + 356, 356, 356, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, 356, 356, -1, -1, -1, 356, 356, + 356, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 128, 351, 356, 356, 356, -1, + 356, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 356, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + 356, -1, -1, -1, -1, -1, -1, 356, + 356, -1, -1, -1, -1, -1, -1, -1, + -1, 356, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 356, 356, + 356, 356, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + 356, 356, -1, -1, -1, 356, 356, 356, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, 129, 349, 356, 356, 356, -1, 356, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 356, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 356, + -1, -1, -1, -1, -1, -1, 356, 356, + -1, -1, -1, -1, -1, -1, -1, -1, + 356, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 356, 356, 356, + 356, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 356, + 356, -1, -1, -1, 356, 356, 356, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + 130, 379, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, 379, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 131, 324, 324, + 324, -1, 324, 323, -1, 324, 330, -1, + -1, -1, 330, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, 132, 330, -1, -1, -1, + 330, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, 133, 331, -1, -1, 134, 331, 72, + -1, -1, 181, -1, -1, 181, 73, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 181, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 181, 71, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 181, -1, -1, -1, + 74, 44, -1, -1, -1, -1, 184, -1, + 52, 184, -1, -1, 184, 16, 75, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, 184, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 45, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, 184, 184, -1, -1, -1, + -1, -1, -1, -1, 39, 46, -1, -1, + -1, -1, -1, 318, 320, -1, -1, 76, + 77, 78, -1, 184, -1, -1, -1, 184, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 50, 48, 49, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 47, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 188, 41, + -1, 42, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 43, -1, -1, -1, + 300, 304, -1, -1, 51, 44, -1, -1, + -1, -1, 185, -1, 52, 185, -1, -1, + 185, 17, 75, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 185, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 45, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 185, + 185, -1, -1, -1, -1, -1, -1, -1, + 39, 46, -1, -1, -1, -1, -1, 318, + 320, -1, -1, 76, 77, 78, -1, 185, + -1, -1, -1, 185, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 50, 48, + 49, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + 47, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 188, 41, -1, 42, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + 43, -1, -1, -1, 300, 304, -1, -1, + 51, 44, -1, -1, -1, -1, 183, -1, + 52, 183, -1, -1, 183, 15, 75, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, 183, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 45, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, 183, 183, -1, -1, -1, + -1, -1, -1, -1, 39, 46, -1, -1, + -1, -1, -1, 318, 320, -1, -1, 76, + 77, 78, -1, 183, -1, -1, -1, 183, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 50, 48, 49, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 47, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 188, 41, + -1, 42, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 43, -1, -1, -1, + 300, 304, -1, -1, 51, 44, -1, -1, + -1, -1, 186, -1, 52, 186, -1, -1, + 186, 18, 75, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 186, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 45, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 186, + 186, -1, -1, -1, -1, -1, -1, -1, + 39, 46, -1, -1, -1, -1, -1, 318, + 320, -1, -1, 76, 77, 78, -1, 186, + -1, -1, -1, 186, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 50, 48, + 49, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + 47, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 188, 41, -1, 42, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + 43, -1, -1, -1, 300, 304, -1, -1, + 51, 383, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 135, 138, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 136, + -1, -1, -1, -1, -1, -1, -1, -1, + 137, 334, 334, 334, -1, 334, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 334, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 334, -1, + -1, -1, -1, -1, -1, 334, -1, -1, + -1, -1, -1, -1, 334, 334, -1, -1, + -1, -1, -1, -1, -1, -1, 334, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, 334, 334, 334, 334, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 334, 334, 334, 334, 334, 334, + 334, 334, 334, 334, 334, 334, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 139, 289, 290, 284, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + 137, 141, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + 140, -1, 137, 143, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 296, 301, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, 142, 31, 177, 120, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 117, + -1, -1, -1, -1, -1, -1, -1, -1, + 284, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 178, 282, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, 280, 172, 368, 369, 370, -1, 367, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 348, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 366, + -1, -1, -1, -1, -1, -1, 364, 365, + -1, -1, -1, -1, -1, -1, -1, -1, + 371, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 360, 361, 362, + 363, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 372, + 373, -1, -1, -1, 374, 375, 28, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 357, -1, 359, + -1, 355, 358, 368, 369, 370, -1, 367, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 352, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 366, + -1, -1, -1, -1, -1, -1, 364, 365, + -1, -1, -1, -1, -1, -1, -1, -1, + 371, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 360, 361, 362, + 363, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 372, + 373, -1, -1, -1, 374, 375, 28, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 357, -1, 359, + -1, 355, 358, 368, 369, 370, -1, 367, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 350, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 366, + -1, -1, -1, -1, -1, -1, 364, 365, + -1, -1, -1, -1, -1, -1, -1, -1, + 371, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 360, 361, 362, + 363, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 372, + 373, -1, -1, -1, 374, 375, 28, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 357, -1, 359, + -1, 355, 358, 382, 325, -1, -1, -1, + 109, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 329, 326, + -1, -1, -1, 109, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, 329, 332, 377, -1, -1, -1, -1, + 377, -1, 377, 377, -1, -1, 377, 377, + 377, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 377, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + 377, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 377, 377, -1, + -1, -1, -1, -1, -1, -1, 377, 377, + -1, -1, -1, -1, -1, 377, 377, -1, + -1, 377, 377, 377, 377, 377, -1, 131, + -1, 377, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 377, 377, 377, 144, + 281, 283, -1, -1, 286, 340, 341, 342, + -1, 338, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + 339, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 285, -1, -1, -1, -1, -1, + -1, 366, -1, -1, -1, -1, -1, -1, + 364, 365, -1, -1, -1, -1, -1, -1, + -1, -1, 343, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 360, + 361, 362, 363, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 62, 57, + 56, 372, 373, 58, 60, 61, 374, 375, + 28, 59, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 333, 337, 335, + 336, 344, 145, 283, -1, -1, 291, 297, + 298, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 301, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 146, 118, -1, 119, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, 288, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, 147, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 287, 120, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 117, -1, -1, -1, -1, -1, + -1, -1, -1, 284, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, 148, 282, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 280, 299, 200, 207, + +}; + +unsigned short Parser_keys[] = { + 132, 226, 224, 224, 132, 227, 132, 239, + 132, 240, 132, 132, 132, 132, 45, 244, + 40, 245, 40, 245, 132, 246, 123, 132, + 123, 123, 59, 132, 45, 244, 139, 139, + 40, 287, 132, 194, 40, 287, 125, 227, + 61, 137, 40, 243, 59, 59, 59, 59, + 40, 40, 40, 245, 125, 239, 33, 276, + 33, 276, 40, 284, 132, 290, 40, 287, + 44, 59, 38, 151, 33, 276, 33, 202, + 33, 188, 33, 266, 33, 202, 33, 276, + 33, 276, 33, 202, 33, 202, 189, 274, + 189, 274, 186, 275, 142, 142, 33, 276, + 59, 59, 44, 59, 33, 276, 59, 59, + 40, 245, 42, 290, 42, 290, 42, 290, + 59, 59, 59, 59, 41, 41, 132, 289, + 41, 44, 33, 276, 186, 278, 189, 279, + 189, 279, 33, 276, 33, 276, 33, 276, + 33, 276, 33, 276, 33, 276, 33, 276, + 33, 276, 33, 276, 132, 288, 40, 270, + 40, 269, 40, 269, 40, 269, 40, 269, + 40, 269, 40, 269, 40, 207, 40, 207, + 40, 207, 40, 207, 203, 207, 203, 207, + 44, 271, 45, 276, 33, 276, 44, 251, + 132, 240, 40, 287, 59, 59, 40, 245, + 59, 59, 40, 245, 59, 59, 40, 245, + 41, 149, 186, 193, 189, 274, 189, 274, + 189, 193, 38, 151, 33, 276, 33, 276, + 33, 276, 33, 276, 132, 289, 132, 269, + 40, 243, 139, 139, 139, 139, 132, 269, + 132, 269, 44, 125, 139, 271, 61, 61, + 59, 59, 40, 269, 124, 124, 40, 287, + 40, 287, 40, 287, 132, 132, 189, 279, + 189, 279, 193, 193, 33, 188, 44, 44, + 41, 41, 41, 44, 40, 284, 44, 44, + 41, 44, 125, 125, 125, 271, 43, 270, + 40, 269, 125, 125, 41, 41, 41, 41, + 0, 0 +}; + +unsigned int Parser_offsets[] = { + 0, 95, 96, 192, 300, 409, 410, 411, + 611, 817, 1023, 1138, 1148, 1149, 1223, 1423, + 1424, 1672, 1735, 1983, 2086, 2163, 2367, 2368, + 2369, 2370, 2576, 2691, 2935, 3179, 3424, 3583, + 3831, 3847, 3961, 4205, 4375, 4531, 4765, 4935, + 5179, 5423, 5593, 5763, 5849, 5935, 6025, 6026, + 6270, 6271, 6287, 6531, 6532, 6738, 6987, 7236, + 7485, 7486, 7487, 7488, 7646, 7650, 7894, 7987, + 8078, 8169, 8413, 8657, 8901, 9145, 9389, 9633, + 9877, 10121, 10365, 10522, 10753, 10983, 11213, 11443, + 11673, 11903, 12133, 12301, 12469, 12637, 12805, 12810, + 12815, 13043, 13275, 13519, 13727, 13836, 14084, 14085, + 14291, 14292, 14498, 14499, 14705, 14814, 14822, 14908, + 14994, 14999, 15113, 15357, 15601, 15845, 16089, 16247, + 16385, 16589, 16590, 16591, 16729, 16867, 16949, 17082, + 17083, 17084, 17314, 17315, 17563, 17811, 18059, 18060, + 18151, 18242, 18243, 18399, 18400, 18401, 18405, 18650, + 18651, 18655, 18656, 18803, 19031, 19261, 19262, 19263, + 19264 +}; + +unsigned short Parser_targs[] = { + 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 15, + 15, 15, 15, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, + 28, 29, 30, 31, 32, 33, 34, 35, + 35, 36, 37, 38, 39, 40, 41, 42, + 43, 44, 45, 46, 47, 48, 49, 50, + 51, 52, 53, 54, 55, 56, 57, 58, + 59, 60, 61, 62, 63, 64, 65, 66, + 67, 68, 69, 70, 71, 72, 73, 74, + 75, 76, 77, 78, 79, 80, 81, 82, + 83, 84, 85, 86, 87, 88, 89, 90, + 91, 92, 93, 94, 95, 96, 97, 98, + 99, 100, 101, 102, 103, 104, 105, 106, + 107, 108, 109, 110, 111, 112, 113, 114, + 115, 116, 117, 118, 119, 120, 121, 122, + 123, 124, 125, 126, 127, 128, 129, 130, + 131, 132, 133, 134, 135, 136, 137, 138, + 139, 140, 141, 142, 143, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144 +}; + +unsigned int Parser_actInds[] = { + 0, 2, 4, 6, 8, 10, 12, 14, + 16, 18, 20, 22, 24, 26, 28, 30, + 33, 36, 39, 42, 45, 47, 49, 51, + 53, 55, 57, 59, 61, 63, 65, 67, + 69, 71, 73, 75, 77, 79, 81, 83, + 85, 88, 90, 92, 94, 96, 98, 100, + 102, 104, 106, 108, 110, 112, 114, 116, + 118, 120, 122, 124, 126, 128, 130, 132, + 134, 136, 138, 140, 142, 144, 146, 148, + 150, 152, 154, 156, 158, 160, 162, 164, + 166, 168, 170, 172, 174, 176, 178, 180, + 182, 184, 186, 188, 190, 192, 195, 197, + 199, 201, 203, 205, 207, 209, 211, 213, + 215, 217, 219, 221, 223, 225, 227, 229, + 231, 233, 235, 237, 239, 241, 243, 245, + 247, 249, 251, 253, 255, 257, 259, 261, + 263, 265, 267, 269, 271, 273, 275, 277, + 279, 281, 283, 285, 287, 289, 291, 293, + 295, 297, 299, 301, 303, 305, 307, 309, + 311, 313, 315, 317, 319, 321, 323, 325, + 327, 329, 331, 333, 335, 337, 339, 341, + 343, 345, 347, 349, 351, 353, 355, 357, + 359, 361, 363, 365, 367, 369, 371, 373, + 375, 377, 379, 381, 383, 385, 387, 389, + 391, 393, 395, 397, 399, 401, 403, 405, + 407, 409, 411, 413, 415, 417, 419, 421, + 423, 425, 427, 429, 431, 433, 435, 437, + 439, 441, 443, 445, 447, 449, 451, 453, + 455, 457, 459, 461, 463, 465, 467, 469, + 471, 473, 475, 477, 479, 481, 483, 485, + 487, 489, 491, 493, 495, 497, 499, 501, + 503, 505, 507, 509, 511, 513, 515, 517, + 519, 521, 523, 525, 527, 529, 531, 533, + 535, 537, 539, 541, 543, 545, 547, 549, + 551, 553, 555, 557, 559, 561, 563, 565, + 567, 569, 571, 573, 575, 577, 579, 581, + 583, 585, 587, 589, 591, 593, 595, 597, + 599, 601, 603, 605, 607, 609, 611, 613, + 615, 617, 619, 621, 623, 625, 627, 629, + 631, 633, 635, 637, 639, 641, 643, 645, + 647, 649, 651, 653, 655, 657, 659, 661, + 663, 665, 667, 669, 671, 673, 675, 677, + 679, 681, 683, 685, 687, 689, 691, 693, + 695, 697, 699, 701, 703, 705, 707, 709, + 711, 713, 715, 717, 719, 721, 723, 725, + 727, 729, 731, 733, 735, 737, 739, 741, + 743, 745, 747, 749, 751, 753, 755, 757, + 759, 761, 763, 765, 767, 769, 771, 773, + 775, 777, 779, 781, 783, 785, 787, 789, + 791, 793 +}; + +unsigned int Parser_actions[] = { + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 170, 1, + 0, 174, 1, 0, 178, 1, 0, 182, + 1, 0, 186, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 66, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 270, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, + 0, 2, 0, 7, 0, 10, 0, 15, + 0, 18, 0, 59, 0, 62, 0, 63, + 0, 66, 0, 71, 0, 75, 0, 79, + 0, 83, 0, 87, 0, 91, 0, 95, + 0, 99, 0, 103, 0, 107, 0, 111, + 0, 115, 0, 118, 0, 122, 0, 127, + 0, 131, 0, 135, 0, 139, 0, 143, + 0, 147, 0, 151, 0, 155, 0, 158, + 0, 162, 0, 166, 0, 170, 0, 174, + 0, 178, 0, 182, 0, 186, 0, 191, + 0, 195, 0, 199, 0, 203, 0, 207, + 0, 211, 0, 215, 0, 218, 0, 223, + 0, 226, 0, 231, 0, 235, 0, 239, + 0, 243, 0, 247, 0, 251, 0, 255, + 0, 259, 0, 263, 0, 267, 0, 270, + 0, 274, 0, 278, 0, 282, 0, 286, + 0, 291, 0, 295, 0, 299, 0, 303, + 0, 307, 0, 311, 0, 315, 0, 319, + 0, 323, 0, 327, 0, 331, 0, 335, + 0, 339, 0, 343, 0, 347, 0, 351, + 0, 355, 0, 359, 0, 363, 0, 367, + 0, 371, 0, 375, 0, 379, 0, 383, + 0, 387, 0, 391, 0, 395, 0, 399, + 0, 403, 0, 407, 0, 411, 0, 415, + 0, 419, 0, 423, 0, 427, 0, 431, + 0, 435, 0, 439, 0, 443, 0, 447, + 0, 451, 0, 455, 0, 459, 0, 463, + 0, 467, 0, 471, 0, 475, 0, 479, + 0, 483, 0, 487, 0, 491, 0, 495, + 0, 499, 0, 503, 0, 507, 0, 511, + 0, 515, 0, 519, 0, 523, 0, 527, + 0, 531, 0, 535, 0, 539, 0, 543, + 0, 547, 0, 551, 0, 555, 0, 559, + 0, 563, 0, 567, 0, 570, 0, 571, + 0, 575, 0, 578, 0, 583, 0, 587, + 0, 591, 0, 595, 0, 598, 0, 603, + 0, 607, 0, 611, 0, 615, 0, 619, + 0, 623, 0, 627, 0, 631, 0, 635, + 0, 639, 0, 643, 0, 647, 0, 651, + 0, 654, 0, 658, 0, 662, 0, 663, + 0, 667, 0, 671, 0, 675, 0, 679, + 0, 683, 0, 686, 0, 687, 0, 690, + 0, 691, 0, 695, 0, 699, 0, 703, + 0, 707, 0, 710, 0, 715, 0, 718, + 0, 723, 0, 727, 0, 731, 0, 735, + 0, 739, 0, 742, 0, 746, 0, 751, + 0, 755, 0, 758, 0, 763, 0, 767, + 0, 771, 0, 775, 0, 779, 0, 783, + 0, 787, 0, 791, 0, 795, 0, 799, + 0, 803, 0, 807, 0, 811, 0, 815, + 0, 819, 0, 823, 0, 827, 0, 831, + 0, 835, 0, 839, 0, 843, 0, 846, + 0, 851, 0, 855, 0, 859, 0, 863, + 0, 867, 0, 871, 0, 875, 0, 879, + 0, 883, 0, 887, 0, 891, 0, 895, + 0, 899, 0, 903, 0, 907, 0, 911, + 0, 915, 0, 919, 0, 923, 0, 927, + 0, 930, 0, 934, 0, 938, 0, 943, + 0, 946, 0, 951, 0, 955, 0, 23, + 0, 27, 0, 31, 0, 35, 0, 39, + 0, 43, 0, 47, 0, 51, 0, 55, + 0, 1, 0 +}; + +int Parser_commitLen[] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 2 +}; + +unsigned int Parser_fssProdIdIndex[] = { + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, + 56, 57, 58, 59, 60, 61, 62, 63, + 64, 65, 66, 67, 68, 69, 70, 71, + 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 86, 87, + 88, 89, 90, 91, 92, 93, 94, 95, + 96, 97, 98, 99, 100, 101, 102, 103, + 104, 105, 106, 107, 108, 109, 110, 111, + 112, 113, 114, 115, 116, 117, 118, 119, + 120, 121, 122, 123, 124, 125, 126, 127, + 128, 129, 130, 131, 132, 133, 134, 135, + 136, 137, 138, 139, 140, 141, 142, 143, + 144, 145, 146, 147, 148, 149, 150, 151, + 152, 153, 154, 155, 156, 157, 158, 159, + 160, 161, 162, 163, 164, 165, 166, 167, + 168, 169, 170, 171, 172, 173, 174, 175, + 176, 177, 178, 179, 180, 181, 182, 183, + 184, 185, 186, 187, 188, 189, 190, 191, + 192, 193, 194, 195, 196, 197, 198, 199, + 200, 201, 202, 203, 204, 205, 206, 207, + 208, 209, 210, 211, 212, 213, 214, 215, + 216, 217, 218, 219, 220, 221, 222, 223, + 224, 225, 226, 227, 228, 229, 230, 231, + 232, 233, 234, 235, 236, 237, 238, 239 +}; + +char Parser_fssProdLengths[] = { + 1, 3, 0, 2, 0, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 0, 4, 5, 5, 1, 5, 4, 3, + 4, 3, 3, 5, 2, 0, 1, 4, + 2, 1, 1, 1, 3, 2, 1, 0, + 3, 1, 3, 3, 3, 3, 1, 2, + 3, 3, 3, 3, 1, 3, 1, 3, + 1, 3, 3, 7, 3, 3, 3, 3, + 3, 3, 7, 1, 1, 1, 1, 1, + 1, 2, 1, 2, 1, 2, 1, 1, + 2, 1, 2, 1, 2, 1, 2, 1, + 2, 1, 2, 1, 2, 1, 2, 1, + 2, 1, 2, 1, 2, 1, 2, 1, + 2, 1, 2, 1, 2, 1, 2, 1, + 2, 1, 2, 1, 2, 1, 2, 1, + 2, 1, 2, 1, 2, 1, 2, 1, + 2, 1, 2, 1, 2, 1, 2, 1, + 2, 1, 2, 1, 3, 1, 1, 3, + 1, 1, 1, 2, 2, 1, 2, 2, + 2, 2, 4, 5, 5, 6, 1, 1, + 2, 2, 1, 1, 1, 1, 3, 3, + 3, 3, 3, 1, 1, 1, 2, 1, + 2, 0, 2, 1, 3, 3, 1, 1, + 2, 0, 1, 3, 2, 0, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 2, 3, 3, 4, 3, 4, 3, 4, + 2, 2, 2, 0, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 4, + 2, 0, 2, 1, 0, 3, 1, 1 +}; + +unsigned short Parser_prodLhsIds[] = { + 226, 225, 225, 227, 227, 228, 228, 228, + 228, 228, 228, 228, 228, 228, 238, 239, + 239, 237, 229, 230, 240, 231, 232, 232, + 233, 234, 235, 236, 246, 246, 242, 242, + 247, 247, 248, 248, 248, 249, 249, 249, + 241, 241, 252, 252, 252, 252, 252, 253, + 253, 253, 253, 253, 253, 254, 254, 255, + 255, 257, 257, 257, 257, 257, 257, 257, + 257, 257, 257, 257, 258, 258, 258, 258, + 261, 261, 261, 261, 261, 261, 261, 262, + 262, 262, 262, 262, 262, 262, 262, 262, + 262, 262, 262, 263, 263, 263, 263, 263, + 263, 263, 263, 263, 263, 263, 263, 264, + 264, 264, 264, 264, 264, 264, 264, 264, + 264, 264, 264, 265, 265, 265, 265, 265, + 265, 265, 265, 265, 265, 265, 265, 266, + 266, 266, 266, 266, 266, 266, 266, 266, + 266, 266, 266, 250, 250, 250, 269, 251, + 260, 259, 270, 270, 270, 267, 268, 268, + 268, 268, 268, 268, 268, 268, 268, 271, + 272, 272, 272, 273, 273, 273, 273, 273, + 273, 273, 273, 276, 276, 244, 244, 244, + 275, 275, 277, 277, 278, 278, 278, 278, + 274, 274, 279, 279, 243, 243, 280, 280, + 280, 283, 283, 283, 283, 283, 283, 281, + 281, 281, 281, 281, 281, 281, 281, 281, + 281, 281, 245, 245, 286, 286, 286, 282, + 282, 282, 282, 282, 282, 282, 287, 287, + 287, 287, 287, 284, 284, 284, 284, 284, + 256, 288, 285, 290, 290, 289, 289, 291 +}; + +const char *Parser_prodNames[] = { + "start-1", + "section_list-1", + "section_list-2", + "statement_list-1", + "statement_list-2", + "statement-1", + "statement-2", + "statement-3", + "statement-4", + "statement-5", + "statement-6", + "statement-7", + "statement-8", + "statement-9", + "export_open-1", + "opt_export-1", + "opt_export-2", + "export_block-1", + "assignment-1", + "instantiation-1", + "machine_name-1", + "action_spec-1", + "alphtype_spec-1", + "alphtype_spec-2", + "range_spec-1", + "getkey_spec-1", + "access_spec-1", + "variable_spec-1", + "opt_whitespace-1", + "opt_whitespace-2", + "join_or_lm-1", + "join_or_lm-2", + "lm_part_list-1", + "lm_part_list-2", + "longest_match_part-1", + "longest_match_part-2", + "longest_match_part-3", + "opt_lm_part_action-1", + "opt_lm_part_action-2", + "opt_lm_part_action-3", + "join-1", + "join-2", + "expression-1", + "expression-2", + "expression-3", + "expression-4", + "expression-5", + "term-1", + "term-2", + "term-3", + "term-4", + "term-5", + "term-6", + "factor_with_label-1", + "factor_with_label-2", + "factor_with_ep-1", + "factor_with_ep-2", + "factor_with_aug-1", + "factor_with_aug-2", + "factor_with_aug-3", + "factor_with_aug-4", + "factor_with_aug-5", + "factor_with_aug-6", + "factor_with_aug-7", + "factor_with_aug-8", + "factor_with_aug-9", + "factor_with_aug-10", + "factor_with_aug-11", + "aug_type_base-1", + "aug_type_base-2", + "aug_type_base-3", + "aug_type_base-4", + "aug_type_cond-1", + "aug_type_cond-2", + "aug_type_cond-3", + "aug_type_cond-4", + "aug_type_cond-5", + "aug_type_cond-6", + "aug_type_cond-7", + "aug_type_to_state-1", + "aug_type_to_state-2", + "aug_type_to_state-3", + "aug_type_to_state-4", + "aug_type_to_state-5", + "aug_type_to_state-6", + "aug_type_to_state-7", + "aug_type_to_state-8", + "aug_type_to_state-9", + "aug_type_to_state-10", + "aug_type_to_state-11", + "aug_type_to_state-12", + "aug_type_from_state-1", + "aug_type_from_state-2", + "aug_type_from_state-3", + "aug_type_from_state-4", + "aug_type_from_state-5", + "aug_type_from_state-6", + "aug_type_from_state-7", + "aug_type_from_state-8", + "aug_type_from_state-9", + "aug_type_from_state-10", + "aug_type_from_state-11", + "aug_type_from_state-12", + "aug_type_eof-1", + "aug_type_eof-2", + "aug_type_eof-3", + "aug_type_eof-4", + "aug_type_eof-5", + "aug_type_eof-6", + "aug_type_eof-7", + "aug_type_eof-8", + "aug_type_eof-9", + "aug_type_eof-10", + "aug_type_eof-11", + "aug_type_eof-12", + "aug_type_gbl_error-1", + "aug_type_gbl_error-2", + "aug_type_gbl_error-3", + "aug_type_gbl_error-4", + "aug_type_gbl_error-5", + "aug_type_gbl_error-6", + "aug_type_gbl_error-7", + "aug_type_gbl_error-8", + "aug_type_gbl_error-9", + "aug_type_gbl_error-10", + "aug_type_gbl_error-11", + "aug_type_gbl_error-12", + "aug_type_local_error-1", + "aug_type_local_error-2", + "aug_type_local_error-3", + "aug_type_local_error-4", + "aug_type_local_error-5", + "aug_type_local_error-6", + "aug_type_local_error-7", + "aug_type_local_error-8", + "aug_type_local_error-9", + "aug_type_local_error-10", + "aug_type_local_error-11", + "aug_type_local_error-12", + "action_embed-1", + "action_embed-2", + "action_embed-3", + "action_embed_word-1", + "action_embed_block-1", + "priority_name-1", + "priority_aug-1", + "priority_aug_num-1", + "priority_aug_num-2", + "priority_aug_num-3", + "local_err_name-1", + "factor_with_rep-1", + "factor_with_rep-2", + "factor_with_rep-3", + "factor_with_rep-4", + "factor_with_rep-5", + "factor_with_rep-6", + "factor_with_rep-7", + "factor_with_rep-8", + "factor_with_rep-9", + "factor_rep_num-1", + "factor_with_neg-1", + "factor_with_neg-2", + "factor_with_neg-3", + "factor-1", + "factor-2", + "factor-3", + "factor-4", + "factor-5", + "factor-6", + "factor-7", + "factor-8", + "range_lit-1", + "range_lit-2", + "alphabet_num-1", + "alphabet_num-2", + "alphabet_num-3", + "regular_expr-1", + "regular_expr-2", + "regular_expr_item-1", + "regular_expr_item-2", + "regular_expr_char-1", + "regular_expr_char-2", + "regular_expr_char-3", + "regular_expr_char-4", + "regular_expr_or_data-1", + "regular_expr_or_data-2", + "regular_expr_or_char-1", + "regular_expr_or_char-2", + "inline_block-1", + "inline_block-2", + "inline_block_item-1", + "inline_block_item-2", + "inline_block_item-3", + "inline_block_symbol-1", + "inline_block_symbol-2", + "inline_block_symbol-3", + "inline_block_symbol-4", + "inline_block_symbol-5", + "inline_block_symbol-6", + "inline_block_interpret-1", + "inline_block_interpret-2", + "inline_block_interpret-3", + "inline_block_interpret-4", + "inline_block_interpret-5", + "inline_block_interpret-6", + "inline_block_interpret-7", + "inline_block_interpret-8", + "inline_block_interpret-9", + "inline_block_interpret-10", + "inline_block_interpret-11", + "inline_expr-1", + "inline_expr-2", + "inline_expr_item-1", + "inline_expr_item-2", + "inline_expr_item-3", + "inline_expr_any-1", + "inline_expr_any-2", + "inline_expr_any-3", + "inline_expr_any-4", + "inline_expr_any-5", + "inline_expr_any-6", + "inline_expr_any-7", + "inline_expr_symbol-1", + "inline_expr_symbol-2", + "inline_expr_symbol-3", + "inline_expr_symbol-4", + "inline_expr_symbol-5", + "inline_expr_interpret-1", + "inline_expr_interpret-2", + "inline_expr_interpret-3", + "inline_expr_interpret-4", + "inline_expr_interpret-5", + "local_state_ref-1", + "no_name_sep-1", + "state_ref-1", + "opt_name_sep-1", + "opt_name_sep-2", + "state_ref_names-1", + "state_ref_names-2", + "_start-1" +}; + +const char *Parser_lelNames[] = { + "D-0", + "D-1", + "D-2", + "D-3", + "D-4", + "D-5", + "D-6", + "D-7", + "D-8", + "D-9", + "D-10", + "D-11", + "D-12", + "D-13", + "D-14", + "D-15", + "D-16", + "D-17", + "D-18", + "D-19", + "D-20", + "D-21", + "D-22", + "D-23", + "D-24", + "D-25", + "D-26", + "D-27", + "D-28", + "D-29", + "D-30", + "D-31", + "D-32", + "!", + "\"", + "#", + "$", + "%", + "&", + "'", + "(", + ")", + "*", + "+", + ",", + "-", + ".", + "/", + "0", + "1", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + ":", + ";", + "<", + "=", + ">", + "?", + "@", + "A", + "B", + "C", + "D", + "E", + "F", + "G", + "H", + "I", + "J", + "K", + "L", + "M", + "N", + "O", + "P", + "Q", + "R", + "S", + "T", + "U", + "V", + "W", + "X", + "Y", + "Z", + "[", + "\\", + "]", + "^", + "_", + "`", + "a", + "b", + "c", + "d", + "e", + "f", + "g", + "h", + "i", + "j", + "k", + "l", + "m", + "n", + "o", + "p", + "q", + "r", + "s", + "t", + "u", + "v", + "w", + "x", + "y", + "z", + "{", + "|", + "}", + "~", + "D-127", + "KW_Machine", + "KW_Include", + "KW_Import", + "KW_Write", + "TK_Word", + "TK_Literal", + "TK_Number", + "TK_Inline", + "TK_Reference", + "TK_ColonEquals", + "TK_EndSection", + "TK_UInt", + "TK_Hex", + "TK_BaseClause", + "TK_DotDot", + "TK_ColonGt", + "TK_ColonGtGt", + "TK_LtColon", + "TK_Arrow", + "TK_DoubleArrow", + "TK_StarStar", + "TK_NameSep", + "TK_BarStar", + "TK_DashDash", + "TK_StartCond", + "TK_AllCond", + "TK_LeavingCond", + "TK_Middle", + "TK_StartGblError", + "TK_AllGblError", + "TK_FinalGblError", + "TK_NotFinalGblError", + "TK_NotStartGblError", + "TK_MiddleGblError", + "TK_StartLocalError", + "TK_AllLocalError", + "TK_FinalLocalError", + "TK_NotFinalLocalError", + "TK_NotStartLocalError", + "TK_MiddleLocalError", + "TK_StartEOF", + "TK_AllEOF", + "TK_FinalEOF", + "TK_NotFinalEOF", + "TK_NotStartEOF", + "TK_MiddleEOF", + "TK_StartToState", + "TK_AllToState", + "TK_FinalToState", + "TK_NotFinalToState", + "TK_NotStartToState", + "TK_MiddleToState", + "TK_StartFromState", + "TK_AllFromState", + "TK_FinalFromState", + "TK_NotFinalFromState", + "TK_NotStartFromState", + "TK_MiddleFromState", + "RE_Slash", + "RE_SqOpen", + "RE_SqOpenNeg", + "RE_SqClose", + "RE_Dot", + "RE_Star", + "RE_Dash", + "RE_Char", + "IL_WhiteSpace", + "IL_Comment", + "IL_Literal", + "IL_Symbol", + "KW_Action", + "KW_AlphType", + "KW_Range", + "KW_GetKey", + "KW_When", + "KW_Eof", + "KW_Err", + "KW_Lerr", + "KW_To", + "KW_From", + "KW_Export", + "KW_Break", + "KW_Exec", + "KW_Hold", + "KW_PChar", + "KW_Char", + "KW_Goto", + "KW_Call", + "KW_Ret", + "KW_CurState", + "KW_TargState", + "KW_Entry", + "KW_Next", + "KW_Variable", + "KW_Access", + "TK_Semi", + "_eof", + "section_list", + "start", + "statement_list", + "statement", + "assignment", + "instantiation", + "action_spec", + "alphtype_spec", + "range_spec", + "getkey_spec", + "access_spec", + "variable_spec", + "export_block", + "export_open", + "opt_export", + "machine_name", + "join", + "join_or_lm", + "inline_block", + "alphabet_num", + "inline_expr", + "opt_whitespace", + "lm_part_list", + "longest_match_part", + "opt_lm_part_action", + "action_embed", + "action_embed_block", + "expression", + "term", + "factor_with_label", + "factor_with_ep", + "local_state_ref", + "factor_with_aug", + "aug_type_base", + "priority_aug", + "priority_name", + "aug_type_cond", + "aug_type_to_state", + "aug_type_from_state", + "aug_type_eof", + "aug_type_gbl_error", + "aug_type_local_error", + "local_err_name", + "factor_with_rep", + "action_embed_word", + "priority_aug_num", + "factor_rep_num", + "factor_with_neg", + "factor", + "regular_expr_or_data", + "regular_expr", + "range_lit", + "regular_expr_item", + "regular_expr_char", + "regular_expr_or_char", + "inline_block_item", + "inline_block_interpret", + "inline_expr_any", + "inline_block_symbol", + "inline_expr_interpret", + "state_ref", + "inline_expr_item", + "inline_expr_symbol", + "no_name_sep", + "state_ref_names", + "opt_name_sep", + "_start" +}; + +#line 1375 "rlparse.kl" + + +void Parser::init() +{ + #line 3769 "rlparse.cpp" + curs = Parser_startState; + pool = 0; + freshEl = (struct Parser_LangEl*) malloc( sizeof(struct Parser_LangEl)*8128); + #ifdef LOG_ACTIONS + cerr << "allocating 8128 LangEls" << endl; + #endif + stackTop = freshEl; + stackTop->type = 0; + stackTop->state = -1; + stackTop->next = 0; + stackTop->child = 0; + freshPos = 1; + lastFinal = stackTop; + numRetry = 0; + numNodes = 0; + errCount = 0; +#line 1380 "rlparse.kl" +} + +int Parser::parseLangEl( int type, const Token *token ) +{ + #line 3791 "rlparse.cpp" +#define reject() induceReject = 1 + + int pos, targState; + unsigned int *action; + int rhsLen; + struct Parser_LangEl *rhs[32]; + struct Parser_LangEl *lel; + struct Parser_LangEl *input; + char induceReject; + + if ( curs < 0 ) + return 0; + + if ( pool == 0 ) { + if ( freshPos == 8128 ) { + freshEl = (struct Parser_LangEl*) malloc( + sizeof(struct Parser_LangEl)*8128); + #ifdef LOG_ACTIONS + cerr << "allocating 8128 LangEls" << endl; + #endif + freshPos = 0; + } + input = freshEl + freshPos++; + } + else { + input = pool; + pool = pool->next; + } + numNodes += 1; + input->type = type; + input->user.token = *token; + input->next = 0; + input->retry = 0; + input->child = 0; + +again: + if ( input == 0 ) + goto _out; + + lel = input; + if ( lel->type < Parser_keys[curs<<1] || lel->type > Parser_keys[(curs<<1)+1] ) + goto parseError; + + pos = Parser_indicies[Parser_offsets[curs] + (lel->type - Parser_keys[curs<<1])]; + if ( pos < 0 ) + goto parseError; + + induceReject = 0; + targState = Parser_targs[pos]; + action = Parser_actions + Parser_actInds[pos]; + if ( lel->retry & 0x0000ffff ) + action += (lel->retry & 0x0000ffff); + + if ( *action & 0x1 ) { + #ifdef LOG_ACTIONS + cerr << "shifted: " << Parser_lelNames[lel->type]; + #endif + input = input->next; + lel->state = curs; + lel->next = stackTop; + stackTop = lel; + + if ( action[1] == 0 ) + lel->retry &= 0xffff0000; + else { + lel->retry += 1; + numRetry += 1; + #ifdef LOG_ACTIONS + cerr << " retry: " << stackTop; + #endif + } + #ifdef LOG_ACTIONS + cerr << endl; + #endif + } + + if ( Parser_commitLen[pos] != 0 ) { + struct Parser_LangEl *commitHead = stackTop; + int absCommitLen = Parser_commitLen[pos]; + + #ifdef LOG_ACTIONS + cerr << "running commit of length: " << Parser_commitLen[pos] << endl; + #endif + + if ( absCommitLen < 0 ) { + commitHead = commitHead->next; + absCommitLen = -1 * absCommitLen; + } + { + struct Parser_LangEl *lel = commitHead; + struct Parser_LangEl **cmStack = (struct Parser_LangEl**) malloc( sizeof(struct Parser_LangEl) * numNodes); + int n = absCommitLen, depth = 0, sp = 0; + +commit_head: + if ( lel->retry > 0 ) { + if ( lel->retry & 0x0000ffff ) + numRetry -= 1; + if ( lel->retry & 0xffff0000 ) + numRetry -= 1; + lel->retry = 0; + } + + /* If depth is > 0 then move over lel freely, otherwise, make + * sure that we have not already done n steps down the line. */ + if ( lel->next != 0 && ( depth > 0 || n > 1 ) ) { + cmStack[sp++] = lel; + lel = lel->next; + + /* If we are at the top level count the steps down the line. */ + if ( depth == 0 ) + n -= 1; + goto commit_head; + } + +commit_reverse: + if ( lel->child != 0 ) { + cmStack[sp++] = lel; + lel = lel->child; + + /* When we move down we need to increment the depth. */ + depth += 1; + goto commit_head; + } + +commit_upwards: + if ( sp > 0 ) { + /* Figure out which place to return to. */ + if ( cmStack[sp-1]->next == lel ) { + lel = cmStack[--sp]; + goto commit_reverse; + } + else { + /* Going back up, adjust the depth. */ + lel = cmStack[--sp]; + depth -= 1; + goto commit_upwards; + } + } + free( cmStack ); + } + if ( numRetry == 0 ) { + #ifdef LOG_ACTIONS + cerr << "number of retries is zero, " + "executing final actions" << endl; + #endif + { + struct Parser_LangEl *lel = commitHead; + struct Parser_LangEl **cmStack = (struct Parser_LangEl**) malloc( sizeof( struct Parser_LangEl) * numNodes); + int sp = 0; + char doExec = 0; + +final_head: + if ( lel == lastFinal ) { + doExec = 1; + goto hit_final; + } + + if ( lel->next != 0 ) { + cmStack[sp++] = lel; + lel = lel->next; + goto final_head; + } + +final_reverse: + + if ( lel->child != 0 ) { + cmStack[sp++] = lel; + lel = lel->child; + goto final_head; + } + +final_upwards: + + if ( doExec ) { +{ + if ( lel->type < 225 ) { + } + else { + struct Parser_LangEl *redLel = lel; + if ( redLel->child != 0 ) { + int r = Parser_fssProdLengths[redLel->reduction] - 1; + struct Parser_LangEl *rhsEl = redLel->child; + while ( rhsEl != 0 ) { + rhs[r--] = rhsEl; + rhsEl = rhsEl->next; + } + } +switch ( lel->reduction ) { +case 14: { +#line 59 "rlparse.kl" + + exportContext.append( true ); + + +#line 3985 "rlparse.cpp" +} break; +case 15: { +#line 68 "rlparse.kl" + (&redLel->user.opt_export)->isSet = true; + +#line 3991 "rlparse.cpp" +} break; +case 16: { +#line 69 "rlparse.kl" + (&redLel->user.opt_export)->isSet = false; + +#line 3997 "rlparse.cpp" +} break; +case 17: { +#line 72 "rlparse.kl" + + exportContext.remove( exportContext.length()-1 ); + + +#line 4005 "rlparse.cpp" +} break; +case 18: { +#line 77 "rlparse.kl" + + /* Main machine must be an instance. */ + bool isInstance = false; + if ( strcmp((&rhs[1]->user.token_type)->token.data, mainMachine) == 0 ) { + warning((&rhs[1]->user.token_type)->token.loc) << + "main machine will be implicitly instantiated" << endl; + isInstance = true; + } + + /* Generic creation of machine for instantiation and assignment. */ + JoinOrLm *joinOrLm = new JoinOrLm( (&rhs[3]->user.join)->join ); + tryMachineDef( (&rhs[1]->user.token_type)->token.loc, (&rhs[1]->user.token_type)->token.data, joinOrLm, isInstance ); + + if ( (&rhs[0]->user.opt_export)->isSet ) + exportContext.remove( exportContext.length()-1 ); + + +#line 4026 "rlparse.cpp" +} break; +case 19: { +#line 95 "rlparse.kl" + + /* Generic creation of machine for instantiation and assignment. */ + tryMachineDef( (&rhs[1]->user.token_type)->token.loc, (&rhs[1]->user.token_type)->token.data, (&rhs[3]->user.join_or_lm)->joinOrLm, true ); + + if ( (&rhs[0]->user.opt_export)->isSet ) + exportContext.remove( exportContext.length()-1 ); + + +#line 4038 "rlparse.cpp" +} break; +case 20: { +#line 111 "rlparse.kl" + + /* Make/get the priority key. The name may have already been referenced + * and therefore exist. */ + PriorDictEl *priorDictEl; + if ( pd->priorDict.insert( (&rhs[0]->user.token)->data, pd->nextPriorKey, &priorDictEl ) ) + pd->nextPriorKey += 1; + pd->curDefPriorKey = priorDictEl->value; + + /* Make/get the local error key. */ + LocalErrDictEl *localErrDictEl; + if ( pd->localErrDict.insert( (&rhs[0]->user.token)->data, pd->nextLocalErrKey, &localErrDictEl ) ) + pd->nextLocalErrKey += 1; + pd->curDefLocalErrKey = localErrDictEl->value; + + (&redLel->user.token_type)->token = *(&rhs[0]->user.token); + + +#line 4059 "rlparse.cpp" +} break; +case 21: { +#line 129 "rlparse.kl" + + if ( pd->actionDict.find( (&rhs[1]->user.token)->data ) ) { + /* Recover by just ignoring the duplicate. */ + error((&rhs[1]->user.token)->loc) << "action \"" << (&rhs[1]->user.token)->data << "\" already defined" << endl; + } + else { + //cerr << "NEW ACTION " << $2->data << " " << $4->inlineList << endl; + /* Add the action to the list of actions. */ + Action *newAction = new Action( (&rhs[2]->user.token)->loc, (&rhs[1]->user.token)->data, + (&rhs[3]->user.inline_list)->inlineList, pd->nextCondId++ ); + + /* Insert to list and dict. */ + pd->actionList.append( newAction ); + pd->actionDict.insert( newAction ); + } + + +#line 4080 "rlparse.cpp" +} break; +case 22: { +#line 149 "rlparse.kl" + + if ( ! pd->setAlphType( (&rhs[1]->user.token)->data, (&rhs[2]->user.token)->data ) ) { + // Recover by ignoring the alphtype statement. + error((&rhs[1]->user.token)->loc) << "\"" << (&rhs[1]->user.token)->data << + " " << (&rhs[2]->user.token)->data << "\" is not a valid alphabet type" << endl; + } + + +#line 4092 "rlparse.cpp" +} break; +case 23: { +#line 158 "rlparse.kl" + + if ( ! pd->setAlphType( (&rhs[1]->user.token)->data ) ) { + // Recover by ignoring the alphtype statement. + error((&rhs[1]->user.token)->loc) << "\"" << (&rhs[1]->user.token)->data << + "\" is not a valid alphabet type" << endl; + } + + +#line 4104 "rlparse.cpp" +} break; +case 24: { +#line 168 "rlparse.kl" + + // Save the upper and lower ends of the range and emit the line number. + pd->lowerNum = (&rhs[1]->user.token_type)->token.data; + pd->upperNum = (&rhs[2]->user.token_type)->token.data; + pd->rangeLowLoc = (&rhs[1]->user.token_type)->token.loc; + pd->rangeHighLoc = (&rhs[2]->user.token_type)->token.loc; + + +#line 4116 "rlparse.cpp" +} break; +case 25: { +#line 177 "rlparse.kl" + + pd->getKeyExpr = (&rhs[1]->user.inline_list)->inlineList; + + +#line 4124 "rlparse.cpp" +} break; +case 26: { +#line 182 "rlparse.kl" + + pd->accessExpr = (&rhs[1]->user.inline_list)->inlineList; + + +#line 4132 "rlparse.cpp" +} break; +case 27: { +#line 187 "rlparse.kl" + + /* FIXME: Need to implement the rest of this. */ + if ( strcmp( (&rhs[2]->user.token)->data, "curstate" ) == 0 ) + pd->curStateExpr = (&rhs[3]->user.inline_list)->inlineList; + else { + error((&rhs[2]->user.token)->loc) << "sorry, unimplementd" << endl; + } + + +#line 4145 "rlparse.cpp" +} break; +case 30: { +#line 209 "rlparse.kl" + + (&redLel->user.join_or_lm)->joinOrLm = new JoinOrLm( (&rhs[0]->user.join)->join ); + + +#line 4153 "rlparse.cpp" +} break; +case 31: { +#line 213 "rlparse.kl" + + /* Create a new factor going to a longest match structure. Record + * in the parse data that we have a longest match. */ + LongestMatch *lm = new LongestMatch( (&rhs[0]->user.token)->loc, (&rhs[1]->user.lm_part_list)->lmPartList ); + pd->lmList.append( lm ); + for ( LmPartList::Iter lmp = *((&rhs[1]->user.lm_part_list)->lmPartList); lmp.lte(); lmp++ ) + lmp->longestMatch = lm; + (&redLel->user.join_or_lm)->joinOrLm = new JoinOrLm( lm ); + + +#line 4167 "rlparse.cpp" +} break; +case 32: { +#line 229 "rlparse.kl" + + if ( (&rhs[1]->user.longest_match_part)->lmPart != 0 ) + (&rhs[0]->user.lm_part_list)->lmPartList->append( (&rhs[1]->user.longest_match_part)->lmPart ); + (&redLel->user.lm_part_list)->lmPartList = (&rhs[0]->user.lm_part_list)->lmPartList; + + +#line 4177 "rlparse.cpp" +} break; +case 33: { +#line 235 "rlparse.kl" + + /* Create a new list with the part. */ + (&redLel->user.lm_part_list)->lmPartList = new LmPartList; + if ( (&rhs[0]->user.longest_match_part)->lmPart != 0 ) + (&redLel->user.lm_part_list)->lmPartList->append( (&rhs[0]->user.longest_match_part)->lmPart ); + + +#line 4188 "rlparse.cpp" +} break; +case 34: { +#line 248 "rlparse.kl" + (&redLel->user.longest_match_part)->lmPart = 0; + +#line 4194 "rlparse.cpp" +} break; +case 35: { +#line 250 "rlparse.kl" + (&redLel->user.longest_match_part)->lmPart = 0; + +#line 4200 "rlparse.cpp" +} break; +case 36: { +#line 252 "rlparse.kl" + + (&redLel->user.longest_match_part)->lmPart = 0; + Action *action = (&rhs[1]->user.opt_lm_part_action)->action; + if ( action != 0 ) + action->isLmAction = true; + (&redLel->user.longest_match_part)->lmPart = new LongestMatchPart( (&rhs[0]->user.join)->join, action, + (&rhs[2]->user.token)->loc, pd->nextLongestMatchId++ ); + + +#line 4213 "rlparse.cpp" +} break; +case 37: { +#line 267 "rlparse.kl" + + (&redLel->user.opt_lm_part_action)->action = (&rhs[1]->user.action_ref)->action; + + +#line 4221 "rlparse.cpp" +} break; +case 38: { +#line 271 "rlparse.kl" + + (&redLel->user.opt_lm_part_action)->action = (&rhs[0]->user.action_ref)->action; + + +#line 4229 "rlparse.cpp" +} break; +case 39: { +#line 275 "rlparse.kl" + + (&redLel->user.opt_lm_part_action)->action = 0; + + +#line 4237 "rlparse.cpp" +} break; +case 40: { +#line 286 "rlparse.kl" + + /* Append the expression to the list and return it. */ + (&rhs[0]->user.join)->join->exprList.append( (&rhs[2]->user.expression)->expression ); + (&redLel->user.join)->join = (&rhs[0]->user.join)->join; + + +#line 4247 "rlparse.cpp" +} break; +case 41: { +#line 292 "rlparse.kl" + + (&redLel->user.join)->join = new Join( (&rhs[0]->user.expression)->expression ); + + +#line 4255 "rlparse.cpp" +} break; +case 42: { +#line 302 "rlparse.kl" + + (&redLel->user.expression)->expression = new Expression( (&rhs[0]->user.expression)->expression, + (&rhs[2]->user.term)->term, Expression::OrType ); + + +#line 4264 "rlparse.cpp" +} break; +case 43: { +#line 307 "rlparse.kl" + + (&redLel->user.expression)->expression = new Expression( (&rhs[0]->user.expression)->expression, + (&rhs[2]->user.term)->term, Expression::IntersectType ); + + +#line 4273 "rlparse.cpp" +} break; +case 44: { +#line 314 "rlparse.kl" + + (&redLel->user.expression)->expression = new Expression( (&rhs[0]->user.expression)->expression, + (&rhs[2]->user.term)->term, Expression::SubtractType ); + + +#line 4282 "rlparse.cpp" +} break; +case 45: { +#line 319 "rlparse.kl" + + (&redLel->user.expression)->expression = new Expression( (&rhs[0]->user.expression)->expression, + (&rhs[2]->user.term)->term, Expression::StrongSubtractType ); + + +#line 4291 "rlparse.cpp" +} break; +case 46: { +#line 324 "rlparse.kl" + + (&redLel->user.expression)->expression = new Expression( (&rhs[0]->user.term)->term ); + + +#line 4299 "rlparse.cpp" +} break; +case 47: { +#line 334 "rlparse.kl" + + (&redLel->user.term)->term = new Term( (&rhs[0]->user.term)->term, (&rhs[1]->user.factor_with_label)->factorWithAug ); + + +#line 4307 "rlparse.cpp" +} break; +case 48: { +#line 338 "rlparse.kl" + + (&redLel->user.term)->term = new Term( (&rhs[0]->user.term)->term, (&rhs[2]->user.factor_with_label)->factorWithAug ); + + +#line 4315 "rlparse.cpp" +} break; +case 49: { +#line 342 "rlparse.kl" + + (&redLel->user.term)->term = new Term( (&rhs[0]->user.term)->term, (&rhs[2]->user.factor_with_label)->factorWithAug, Term::RightStartType ); + + +#line 4323 "rlparse.cpp" +} break; +case 50: { +#line 346 "rlparse.kl" + + (&redLel->user.term)->term = new Term( (&rhs[0]->user.term)->term, (&rhs[2]->user.factor_with_label)->factorWithAug, Term::RightFinishType ); + + +#line 4331 "rlparse.cpp" +} break; +case 51: { +#line 350 "rlparse.kl" + + (&redLel->user.term)->term = new Term( (&rhs[0]->user.term)->term, + (&rhs[2]->user.factor_with_label)->factorWithAug, Term::LeftType ); + + +#line 4340 "rlparse.cpp" +} break; +case 52: { +#line 355 "rlparse.kl" + + (&redLel->user.term)->term = new Term( (&rhs[0]->user.factor_with_label)->factorWithAug ); + + +#line 4348 "rlparse.cpp" +} break; +case 53: { +#line 365 "rlparse.kl" + + /* Add the label to the list and pass the factor up. */ + (&rhs[2]->user.factor_with_label)->factorWithAug->labels.prepend( Label((&rhs[0]->user.token)->loc, (&rhs[0]->user.token)->data) ); + (&redLel->user.factor_with_label)->factorWithAug = (&rhs[2]->user.factor_with_label)->factorWithAug; + + +#line 4358 "rlparse.cpp" +} break; +case 54: { +#line 371 "rlparse.kl" + + (&redLel->user.factor_with_label)->factorWithAug = (&rhs[0]->user.factor_with_ep)->factorWithAug; + + +#line 4366 "rlparse.cpp" +} break; +case 55: { +#line 381 "rlparse.kl" + + /* Add the target to the list and return the factor object. */ + (&rhs[0]->user.factor_with_ep)->factorWithAug->epsilonLinks.append( EpsilonLink( (&rhs[1]->user.token)->loc, nameRef ) ); + (&redLel->user.factor_with_ep)->factorWithAug = (&rhs[0]->user.factor_with_ep)->factorWithAug; + + +#line 4376 "rlparse.cpp" +} break; +case 56: { +#line 387 "rlparse.kl" + + (&redLel->user.factor_with_ep)->factorWithAug = (&rhs[0]->user.factor_with_aug)->factorWithAug; + + +#line 4384 "rlparse.cpp" +} break; +case 57: { +#line 397 "rlparse.kl" + + /* Append the action to the factorWithAug, record the refernce from + * factorWithAug to the action and pass up the factorWithAug. */ + (&rhs[0]->user.factor_with_aug)->factorWithAug->actions.append( + ParserAction( (&rhs[1]->user.aug_type)->loc, (&rhs[1]->user.aug_type)->augType, 0, (&rhs[2]->user.action_ref)->action ) ); + (&redLel->user.factor_with_aug)->factorWithAug = (&rhs[0]->user.factor_with_aug)->factorWithAug; + + +#line 4396 "rlparse.cpp" +} break; +case 58: { +#line 405 "rlparse.kl" + + /* Append the named priority to the factorWithAug and pass it up. */ + (&rhs[0]->user.factor_with_aug)->factorWithAug->priorityAugs.append( + PriorityAug( (&rhs[1]->user.aug_type)->augType, pd->curDefPriorKey, (&rhs[2]->user.priority_aug)->priorityNum ) ); + (&redLel->user.factor_with_aug)->factorWithAug = (&rhs[0]->user.factor_with_aug)->factorWithAug; + + +#line 4407 "rlparse.cpp" +} break; +case 59: { +#line 412 "rlparse.kl" + + /* Append the priority using a default name. */ + (&rhs[0]->user.factor_with_aug)->factorWithAug->priorityAugs.append( + PriorityAug( (&rhs[1]->user.aug_type)->augType, (&rhs[3]->user.priority_name)->priorityName, (&rhs[5]->user.priority_aug)->priorityNum ) ); + (&redLel->user.factor_with_aug)->factorWithAug = (&rhs[0]->user.factor_with_aug)->factorWithAug; + + +#line 4418 "rlparse.cpp" +} break; +case 60: { +#line 419 "rlparse.kl" + + (&rhs[0]->user.factor_with_aug)->factorWithAug->conditions.append( ParserAction( (&rhs[1]->user.aug_type)->loc, + (&rhs[1]->user.aug_type)->augType, 0, (&rhs[2]->user.action_ref)->action ) ); + (&redLel->user.factor_with_aug)->factorWithAug = (&rhs[0]->user.factor_with_aug)->factorWithAug; + + +#line 4428 "rlparse.cpp" +} break; +case 61: { +#line 425 "rlparse.kl" + + /* Append the action, pass it up. */ + (&rhs[0]->user.factor_with_aug)->factorWithAug->actions.append( ParserAction( (&rhs[1]->user.aug_type)->loc, + (&rhs[1]->user.aug_type)->augType, 0, (&rhs[2]->user.action_ref)->action ) ); + (&redLel->user.factor_with_aug)->factorWithAug = (&rhs[0]->user.factor_with_aug)->factorWithAug; + + +#line 4439 "rlparse.cpp" +} break; +case 62: { +#line 432 "rlparse.kl" + + /* Append the action, pass it up. */ + (&rhs[0]->user.factor_with_aug)->factorWithAug->actions.append( ParserAction( (&rhs[1]->user.aug_type)->loc, + (&rhs[1]->user.aug_type)->augType, 0, (&rhs[2]->user.action_ref)->action ) ); + (&redLel->user.factor_with_aug)->factorWithAug = (&rhs[0]->user.factor_with_aug)->factorWithAug; + + +#line 4450 "rlparse.cpp" +} break; +case 63: { +#line 439 "rlparse.kl" + + /* Append the action, pass it up. */ + (&rhs[0]->user.factor_with_aug)->factorWithAug->actions.append( ParserAction( (&rhs[1]->user.aug_type)->loc, + (&rhs[1]->user.aug_type)->augType, 0, (&rhs[2]->user.action_ref)->action ) ); + (&redLel->user.factor_with_aug)->factorWithAug = (&rhs[0]->user.factor_with_aug)->factorWithAug; + + +#line 4461 "rlparse.cpp" +} break; +case 64: { +#line 446 "rlparse.kl" + + /* Append the action to the factorWithAug, record the refernce from + * factorWithAug to the action and pass up the factorWithAug. */ + (&rhs[0]->user.factor_with_aug)->factorWithAug->actions.append( ParserAction( (&rhs[1]->user.aug_type)->loc, + (&rhs[1]->user.aug_type)->augType, pd->curDefLocalErrKey, (&rhs[2]->user.action_ref)->action ) ); + (&redLel->user.factor_with_aug)->factorWithAug = (&rhs[0]->user.factor_with_aug)->factorWithAug; + + +#line 4473 "rlparse.cpp" +} break; +case 65: { +#line 454 "rlparse.kl" + + /* Append the action to the factorWithAug, record the refernce from + * factorWithAug to the action and pass up the factorWithAug. */ + (&rhs[0]->user.factor_with_aug)->factorWithAug->actions.append( ParserAction( (&rhs[1]->user.aug_type)->loc, + (&rhs[1]->user.aug_type)->augType, pd->curDefLocalErrKey, (&rhs[2]->user.action_ref)->action ) ); + (&redLel->user.factor_with_aug)->factorWithAug = (&rhs[0]->user.factor_with_aug)->factorWithAug; + + +#line 4485 "rlparse.cpp" +} break; +case 66: { +#line 462 "rlparse.kl" + + /* Append the action to the factorWithAug, record the refernce from + * factorWithAug to the action and pass up the factorWithAug. */ + (&rhs[0]->user.factor_with_aug)->factorWithAug->actions.append( ParserAction( (&rhs[1]->user.aug_type)->loc, + (&rhs[1]->user.aug_type)->augType, (&rhs[3]->user.local_err_name)->error_name, (&rhs[5]->user.action_ref)->action ) ); + (&redLel->user.factor_with_aug)->factorWithAug = (&rhs[0]->user.factor_with_aug)->factorWithAug; + + +#line 4497 "rlparse.cpp" +} break; +case 67: { +#line 470 "rlparse.kl" + + (&redLel->user.factor_with_aug)->factorWithAug = new FactorWithAug( (&rhs[0]->user.factor_with_rep)->factorWithRep ); + + +#line 4505 "rlparse.cpp" +} break; +case 68: { +#line 483 "rlparse.kl" + (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_finish; + +#line 4511 "rlparse.cpp" +} break; +case 69: { +#line 484 "rlparse.kl" + (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_leave; + +#line 4517 "rlparse.cpp" +} break; +case 70: { +#line 485 "rlparse.kl" + (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_all; + +#line 4523 "rlparse.cpp" +} break; +case 71: { +#line 486 "rlparse.kl" + (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_start; + +#line 4529 "rlparse.cpp" +} break; +case 72: { +#line 491 "rlparse.kl" + (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_start; + +#line 4535 "rlparse.cpp" +} break; +case 73: { +#line 492 "rlparse.kl" + (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_start; + +#line 4541 "rlparse.cpp" +} break; +case 74: { +#line 493 "rlparse.kl" + (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_all; + +#line 4547 "rlparse.cpp" +} break; +case 75: { +#line 494 "rlparse.kl" + (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_all; + +#line 4553 "rlparse.cpp" +} break; +case 76: { +#line 495 "rlparse.kl" + (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_leave; + +#line 4559 "rlparse.cpp" +} break; +case 77: { +#line 496 "rlparse.kl" + (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_leave; + +#line 4565 "rlparse.cpp" +} break; +case 78: { +#line 497 "rlparse.kl" + (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_all; + +#line 4571 "rlparse.cpp" +} break; +case 79: { +#line 506 "rlparse.kl" + (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_start_to_state; + +#line 4577 "rlparse.cpp" +} break; +case 80: { +#line 508 "rlparse.kl" + (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_start_to_state; + +#line 4583 "rlparse.cpp" +} break; +case 81: { +#line 511 "rlparse.kl" + (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_not_start_to_state; + +#line 4589 "rlparse.cpp" +} break; +case 82: { +#line 513 "rlparse.kl" + (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_not_start_to_state; + +#line 4595 "rlparse.cpp" +} break; +case 83: { +#line 516 "rlparse.kl" + (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_all_to_state; + +#line 4601 "rlparse.cpp" +} break; +case 84: { +#line 518 "rlparse.kl" + (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_all_to_state; + +#line 4607 "rlparse.cpp" +} break; +case 85: { +#line 521 "rlparse.kl" + (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_final_to_state; + +#line 4613 "rlparse.cpp" +} break; +case 86: { +#line 523 "rlparse.kl" + (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_final_to_state; + +#line 4619 "rlparse.cpp" +} break; +case 87: { +#line 526 "rlparse.kl" + (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_not_final_to_state; + +#line 4625 "rlparse.cpp" +} break; +case 88: { +#line 528 "rlparse.kl" + (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_not_final_to_state; + +#line 4631 "rlparse.cpp" +} break; +case 89: { +#line 531 "rlparse.kl" + (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_middle_to_state; + +#line 4637 "rlparse.cpp" +} break; +case 90: { +#line 533 "rlparse.kl" + (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_middle_to_state; + +#line 4643 "rlparse.cpp" +} break; +case 91: { +#line 542 "rlparse.kl" + (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_start_from_state; + +#line 4649 "rlparse.cpp" +} break; +case 92: { +#line 544 "rlparse.kl" + (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_start_from_state; + +#line 4655 "rlparse.cpp" +} break; +case 93: { +#line 547 "rlparse.kl" + (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_not_start_from_state; + +#line 4661 "rlparse.cpp" +} break; +case 94: { +#line 549 "rlparse.kl" + (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_not_start_from_state; + +#line 4667 "rlparse.cpp" +} break; +case 95: { +#line 552 "rlparse.kl" + (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_all_from_state; + +#line 4673 "rlparse.cpp" +} break; +case 96: { +#line 554 "rlparse.kl" + (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_all_from_state; + +#line 4679 "rlparse.cpp" +} break; +case 97: { +#line 557 "rlparse.kl" + (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_final_from_state; + +#line 4685 "rlparse.cpp" +} break; +case 98: { +#line 559 "rlparse.kl" + (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_final_from_state; + +#line 4691 "rlparse.cpp" +} break; +case 99: { +#line 562 "rlparse.kl" + (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_not_final_from_state; + +#line 4697 "rlparse.cpp" +} break; +case 100: { +#line 564 "rlparse.kl" + (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_not_final_from_state; + +#line 4703 "rlparse.cpp" +} break; +case 101: { +#line 567 "rlparse.kl" + (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_middle_from_state; + +#line 4709 "rlparse.cpp" +} break; +case 102: { +#line 569 "rlparse.kl" + (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_middle_from_state; + +#line 4715 "rlparse.cpp" +} break; +case 103: { +#line 578 "rlparse.kl" + (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_start_eof; + +#line 4721 "rlparse.cpp" +} break; +case 104: { +#line 580 "rlparse.kl" + (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_start_eof; + +#line 4727 "rlparse.cpp" +} break; +case 105: { +#line 583 "rlparse.kl" + (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_not_start_eof; + +#line 4733 "rlparse.cpp" +} break; +case 106: { +#line 585 "rlparse.kl" + (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_not_start_eof; + +#line 4739 "rlparse.cpp" +} break; +case 107: { +#line 588 "rlparse.kl" + (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_all_eof; + +#line 4745 "rlparse.cpp" +} break; +case 108: { +#line 590 "rlparse.kl" + (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_all_eof; + +#line 4751 "rlparse.cpp" +} break; +case 109: { +#line 593 "rlparse.kl" + (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_final_eof; + +#line 4757 "rlparse.cpp" +} break; +case 110: { +#line 595 "rlparse.kl" + (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_final_eof; + +#line 4763 "rlparse.cpp" +} break; +case 111: { +#line 598 "rlparse.kl" + (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_not_final_eof; + +#line 4769 "rlparse.cpp" +} break; +case 112: { +#line 600 "rlparse.kl" + (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_not_final_eof; + +#line 4775 "rlparse.cpp" +} break; +case 113: { +#line 603 "rlparse.kl" + (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_middle_eof; + +#line 4781 "rlparse.cpp" +} break; +case 114: { +#line 605 "rlparse.kl" + (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_middle_eof; + +#line 4787 "rlparse.cpp" +} break; +case 115: { +#line 614 "rlparse.kl" + (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_start_gbl_error; + +#line 4793 "rlparse.cpp" +} break; +case 116: { +#line 616 "rlparse.kl" + (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_start_gbl_error; + +#line 4799 "rlparse.cpp" +} break; +case 117: { +#line 619 "rlparse.kl" + (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_not_start_gbl_error; + +#line 4805 "rlparse.cpp" +} break; +case 118: { +#line 621 "rlparse.kl" + (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_not_start_gbl_error; + +#line 4811 "rlparse.cpp" +} break; +case 119: { +#line 624 "rlparse.kl" + (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_all_gbl_error; + +#line 4817 "rlparse.cpp" +} break; +case 120: { +#line 626 "rlparse.kl" + (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_all_gbl_error; + +#line 4823 "rlparse.cpp" +} break; +case 121: { +#line 629 "rlparse.kl" + (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_final_gbl_error; + +#line 4829 "rlparse.cpp" +} break; +case 122: { +#line 631 "rlparse.kl" + (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_final_gbl_error; + +#line 4835 "rlparse.cpp" +} break; +case 123: { +#line 634 "rlparse.kl" + (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_not_final_gbl_error; + +#line 4841 "rlparse.cpp" +} break; +case 124: { +#line 636 "rlparse.kl" + (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_not_final_gbl_error; + +#line 4847 "rlparse.cpp" +} break; +case 125: { +#line 639 "rlparse.kl" + (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_middle_gbl_error; + +#line 4853 "rlparse.cpp" +} break; +case 126: { +#line 641 "rlparse.kl" + (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_middle_gbl_error; + +#line 4859 "rlparse.cpp" +} break; +case 127: { +#line 651 "rlparse.kl" + (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_start_local_error; + +#line 4865 "rlparse.cpp" +} break; +case 128: { +#line 653 "rlparse.kl" + (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_start_local_error; + +#line 4871 "rlparse.cpp" +} break; +case 129: { +#line 656 "rlparse.kl" + (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_not_start_local_error; + +#line 4877 "rlparse.cpp" +} break; +case 130: { +#line 658 "rlparse.kl" + (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_not_start_local_error; + +#line 4883 "rlparse.cpp" +} break; +case 131: { +#line 661 "rlparse.kl" + (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_all_local_error; + +#line 4889 "rlparse.cpp" +} break; +case 132: { +#line 663 "rlparse.kl" + (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_all_local_error; + +#line 4895 "rlparse.cpp" +} break; +case 133: { +#line 666 "rlparse.kl" + (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_final_local_error; + +#line 4901 "rlparse.cpp" +} break; +case 134: { +#line 668 "rlparse.kl" + (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_final_local_error; + +#line 4907 "rlparse.cpp" +} break; +case 135: { +#line 671 "rlparse.kl" + (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_not_final_local_error; + +#line 4913 "rlparse.cpp" +} break; +case 136: { +#line 673 "rlparse.kl" + (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_not_final_local_error; + +#line 4919 "rlparse.cpp" +} break; +case 137: { +#line 676 "rlparse.kl" + (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_middle_local_error; + +#line 4925 "rlparse.cpp" +} break; +case 138: { +#line 678 "rlparse.kl" + (&redLel->user.aug_type)->loc = (&rhs[0]->user.token)->loc; (&redLel->user.aug_type)->augType = at_middle_local_error; + +#line 4931 "rlparse.cpp" +} break; +case 139: { +#line 691 "rlparse.kl" + (&redLel->user.action_ref)->action = (&rhs[0]->user.action_ref)->action; + +#line 4937 "rlparse.cpp" +} break; +case 140: { +#line 692 "rlparse.kl" + (&redLel->user.action_ref)->action = (&rhs[1]->user.action_ref)->action; + +#line 4943 "rlparse.cpp" +} break; +case 141: { +#line 693 "rlparse.kl" + (&redLel->user.action_ref)->action = (&rhs[0]->user.action_ref)->action; + +#line 4949 "rlparse.cpp" +} break; +case 142: { +#line 698 "rlparse.kl" + + /* Set the name in the actionDict. */ + Action *action = pd->actionDict.find( (&rhs[0]->user.token)->data ); + if ( action != 0 ) { + /* Pass up the action element */ + (&redLel->user.action_ref)->action = action; + } + else { + /* Will recover by returning null as the action. */ + error((&rhs[0]->user.token)->loc) << "action lookup of \"" << (&rhs[0]->user.token)->data << "\" failed" << endl; + (&redLel->user.action_ref)->action = 0; + } + + +#line 4967 "rlparse.cpp" +} break; +case 143: { +#line 715 "rlparse.kl" + + /* Create the action, add it to the list and pass up. */ + Action *newAction = new Action( (&rhs[0]->user.token)->loc, 0, (&rhs[1]->user.inline_list)->inlineList, pd->nextCondId++ ); + pd->actionList.append( newAction ); + (&redLel->user.action_ref)->action = newAction; + + +#line 4978 "rlparse.cpp" +} break; +case 144: { +#line 730 "rlparse.kl" + + // Lookup/create the priority key. + PriorDictEl *priorDictEl; + if ( pd->priorDict.insert( (&rhs[0]->user.token)->data, pd->nextPriorKey, &priorDictEl ) ) + pd->nextPriorKey += 1; + + // Use the inserted/found priority key. + (&redLel->user.priority_name)->priorityName = priorDictEl->value; + + +#line 4992 "rlparse.cpp" +} break; +case 145: { +#line 747 "rlparse.kl" + + // Convert the priority number to a long. Check for overflow. + errno = 0; + //cerr << "PRIOR AUG: " << $1->token.data << endl; + long aug = strtol( (&rhs[0]->user.token_type)->token.data, 0, 10 ); + if ( errno == ERANGE && aug == LONG_MAX ) { + /* Priority number too large. Recover by setting the priority to 0. */ + error((&rhs[0]->user.token_type)->token.loc) << "priority number " << (&rhs[0]->user.token_type)->token.data << + " overflows" << endl; + (&redLel->user.priority_aug)->priorityNum = 0; + } + else if ( errno == ERANGE && aug == LONG_MIN ) { + /* Priority number too large in the neg. Recover by using 0. */ + error((&rhs[0]->user.token_type)->token.loc) << "priority number " << (&rhs[0]->user.token_type)->token.data << + " underflows" << endl; + (&redLel->user.priority_aug)->priorityNum = 0; + } + else { + /* No overflow or underflow. */ + (&redLel->user.priority_aug)->priorityNum = aug; + } + + +#line 5019 "rlparse.cpp" +} break; +case 146: { +#line 773 "rlparse.kl" + + (&redLel->user.token_type)->token = *(&rhs[0]->user.token); + + +#line 5027 "rlparse.cpp" +} break; +case 147: { +#line 777 "rlparse.kl" + + (&redLel->user.token_type)->token.set( "+", 1 ); + (&redLel->user.token_type)->token.loc = (&rhs[0]->user.token)->loc; + (&redLel->user.token_type)->token.append( *(&rhs[1]->user.token) ); + + +#line 5037 "rlparse.cpp" +} break; +case 148: { +#line 783 "rlparse.kl" + + (&redLel->user.token_type)->token.set( "-", 1 ); + (&redLel->user.token_type)->token.loc = (&rhs[0]->user.token)->loc; + (&redLel->user.token_type)->token.append( *(&rhs[1]->user.token) ); + + +#line 5047 "rlparse.cpp" +} break; +case 149: { +#line 795 "rlparse.kl" + + /* Lookup/create the priority key. */ + LocalErrDictEl *localErrDictEl; + if ( pd->localErrDict.insert( (&rhs[0]->user.token)->data, pd->nextLocalErrKey, &localErrDictEl ) ) + pd->nextLocalErrKey += 1; + + /* Use the inserted/found priority key. */ + (&redLel->user.local_err_name)->error_name = localErrDictEl->value; + + +#line 5061 "rlparse.cpp" +} break; +case 150: { +#line 816 "rlparse.kl" + + (&redLel->user.factor_with_rep)->factorWithRep = new FactorWithRep( (&rhs[1]->user.token)->loc, (&rhs[0]->user.factor_with_rep)->factorWithRep, + 0, 0, FactorWithRep::StarType ); + + +#line 5070 "rlparse.cpp" +} break; +case 151: { +#line 821 "rlparse.kl" + + (&redLel->user.factor_with_rep)->factorWithRep = new FactorWithRep( (&rhs[1]->user.token)->loc, (&rhs[0]->user.factor_with_rep)->factorWithRep, + 0, 0, FactorWithRep::StarStarType ); + + +#line 5079 "rlparse.cpp" +} break; +case 152: { +#line 826 "rlparse.kl" + + (&redLel->user.factor_with_rep)->factorWithRep = new FactorWithRep( (&rhs[1]->user.token)->loc, (&rhs[0]->user.factor_with_rep)->factorWithRep, + 0, 0, FactorWithRep::OptionalType ); + + +#line 5088 "rlparse.cpp" +} break; +case 153: { +#line 831 "rlparse.kl" + + (&redLel->user.factor_with_rep)->factorWithRep = new FactorWithRep( (&rhs[1]->user.token)->loc, (&rhs[0]->user.factor_with_rep)->factorWithRep, + 0, 0, FactorWithRep::PlusType ); + + +#line 5097 "rlparse.cpp" +} break; +case 154: { +#line 836 "rlparse.kl" + + (&redLel->user.factor_with_rep)->factorWithRep = new FactorWithRep( (&rhs[1]->user.token)->loc, (&rhs[0]->user.factor_with_rep)->factorWithRep, + (&rhs[2]->user.factor_rep_num)->rep, 0, FactorWithRep::ExactType ); + + +#line 5106 "rlparse.cpp" +} break; +case 155: { +#line 841 "rlparse.kl" + + (&redLel->user.factor_with_rep)->factorWithRep = new FactorWithRep( (&rhs[1]->user.token)->loc, (&rhs[0]->user.factor_with_rep)->factorWithRep, + 0, (&rhs[3]->user.factor_rep_num)->rep, FactorWithRep::MaxType ); + + +#line 5115 "rlparse.cpp" +} break; +case 156: { +#line 846 "rlparse.kl" + + (&redLel->user.factor_with_rep)->factorWithRep = new FactorWithRep( (&rhs[1]->user.token)->loc, (&rhs[0]->user.factor_with_rep)->factorWithRep, + (&rhs[2]->user.factor_rep_num)->rep, 0, FactorWithRep::MinType ); + + +#line 5124 "rlparse.cpp" +} break; +case 157: { +#line 851 "rlparse.kl" + + (&redLel->user.factor_with_rep)->factorWithRep = new FactorWithRep( (&rhs[1]->user.token)->loc, (&rhs[0]->user.factor_with_rep)->factorWithRep, + (&rhs[2]->user.factor_rep_num)->rep, (&rhs[4]->user.factor_rep_num)->rep, FactorWithRep::RangeType ); + + +#line 5133 "rlparse.cpp" +} break; +case 158: { +#line 856 "rlparse.kl" + + (&redLel->user.factor_with_rep)->factorWithRep = new FactorWithRep( (&rhs[0]->user.factor_with_neg)->factorWithNeg ); + + +#line 5141 "rlparse.cpp" +} break; +case 159: { +#line 866 "rlparse.kl" + + // Convert the priority number to a long. Check for overflow. + errno = 0; + long rep = strtol( (&rhs[0]->user.token)->data, 0, 10 ); + if ( errno == ERANGE && rep == LONG_MAX ) { + // Repetition too large. Recover by returing repetition 1. */ + error((&rhs[0]->user.token)->loc) << "repetition number " << (&rhs[0]->user.token)->data << " overflows" << endl; + (&redLel->user.factor_rep_num)->rep = 1; + } + else { + // Cannot be negative, so no overflow. + (&redLel->user.factor_rep_num)->rep = rep; + } + + +#line 5160 "rlparse.cpp" +} break; +case 160: { +#line 892 "rlparse.kl" + + (&redLel->user.factor_with_neg)->factorWithNeg = new FactorWithNeg( (&rhs[0]->user.token)->loc, + (&rhs[1]->user.factor_with_neg)->factorWithNeg, FactorWithNeg::NegateType ); + + +#line 5169 "rlparse.cpp" +} break; +case 161: { +#line 897 "rlparse.kl" + + (&redLel->user.factor_with_neg)->factorWithNeg = new FactorWithNeg( (&rhs[0]->user.token)->loc, + (&rhs[1]->user.factor_with_neg)->factorWithNeg, FactorWithNeg::CharNegateType ); + + +#line 5178 "rlparse.cpp" +} break; +case 162: { +#line 902 "rlparse.kl" + + (&redLel->user.factor_with_neg)->factorWithNeg = new FactorWithNeg( (&rhs[0]->user.factor)->factor ); + + +#line 5186 "rlparse.cpp" +} break; +case 163: { +#line 912 "rlparse.kl" + + /* Create a new factor node going to a concat literal. */ + (&redLel->user.factor)->factor = new Factor( new Literal( *(&rhs[0]->user.token), Literal::LitString ) ); + + +#line 5195 "rlparse.cpp" +} break; +case 164: { +#line 917 "rlparse.kl" + + /* Create a new factor node going to a literal number. */ + (&redLel->user.factor)->factor = new Factor( new Literal( (&rhs[0]->user.token_type)->token, Literal::Number ) ); + + +#line 5204 "rlparse.cpp" +} break; +case 165: { +#line 922 "rlparse.kl" + + /* Find the named graph. */ + GraphDictEl *gdNode = pd->graphDict.find( (&rhs[0]->user.token)->data ); + if ( gdNode == 0 ) { + /* Recover by returning null as the factor node. */ + error((&rhs[0]->user.token)->loc) << "graph lookup of \"" << (&rhs[0]->user.token)->data << "\" failed" << endl; + (&redLel->user.factor)->factor = 0; + } + else if ( gdNode->isInstance ) { + /* Recover by retuning null as the factor node. */ + error((&rhs[0]->user.token)->loc) << "references to graph instantiations not allowed " + "in expressions" << endl; + (&redLel->user.factor)->factor = 0; + } + else { + /* Create a factor node that is a lookup of an expression. */ + (&redLel->user.factor)->factor = new Factor( (&rhs[0]->user.token)->loc, gdNode->value ); + } + + +#line 5228 "rlparse.cpp" +} break; +case 166: { +#line 942 "rlparse.kl" + + /* Create a new factor node going to an OR expression. */ + (&redLel->user.factor)->factor = new Factor( new ReItem( (&rhs[0]->user.token)->loc, (&rhs[1]->user.regular_expr_or_data)->reOrBlock, ReItem::OrBlock ) ); + + +#line 5237 "rlparse.cpp" +} break; +case 167: { +#line 947 "rlparse.kl" + + /* Create a new factor node going to a negated OR expression. */ + (&redLel->user.factor)->factor = new Factor( new ReItem( (&rhs[0]->user.token)->loc, (&rhs[1]->user.regular_expr_or_data)->reOrBlock, ReItem::NegOrBlock ) ); + + +#line 5246 "rlparse.cpp" +} break; +case 168: { +#line 952 "rlparse.kl" + + if ( (&rhs[2]->user.token)->length > 1 ) { + for ( char *p = (&rhs[2]->user.token)->data; *p != 0; p++ ) { + if ( *p == 'i' ) + (&rhs[1]->user.regular_expr)->regExpr->caseInsensitive = true; + } + } + + /* Create a new factor node going to a regular exp. */ + (&redLel->user.factor)->factor = new Factor( (&rhs[1]->user.regular_expr)->regExpr ); + + +#line 5262 "rlparse.cpp" +} break; +case 169: { +#line 964 "rlparse.kl" + + /* Create a new factor node going to a range. */ + (&redLel->user.factor)->factor = new Factor( new Range( (&rhs[0]->user.range_lit)->literal, (&rhs[2]->user.range_lit)->literal ) ); + + +#line 5271 "rlparse.cpp" +} break; +case 170: { +#line 969 "rlparse.kl" + + /* Create a new factor going to a parenthesized join. */ + (&redLel->user.factor)->factor = new Factor( (&rhs[1]->user.join)->join ); + + +#line 5280 "rlparse.cpp" +} break; +case 171: { +#line 981 "rlparse.kl" + + /* Range literas must have only one char. We restrict this in the parse tree. */ + (&redLel->user.range_lit)->literal = new Literal( *(&rhs[0]->user.token), Literal::LitString ); + + +#line 5289 "rlparse.cpp" +} break; +case 172: { +#line 986 "rlparse.kl" + + /* Create a new literal number. */ + (&redLel->user.range_lit)->literal = new Literal( (&rhs[0]->user.token_type)->token, Literal::Number ); + + +#line 5298 "rlparse.cpp" +} break; +case 173: { +#line 995 "rlparse.kl" + + (&redLel->user.token_type)->token = *(&rhs[0]->user.token); + + +#line 5306 "rlparse.cpp" +} break; +case 174: { +#line 999 "rlparse.kl" + + (&redLel->user.token_type)->token.set( "-", 1 ); + (&redLel->user.token_type)->token.loc = (&rhs[0]->user.token)->loc; + (&redLel->user.token_type)->token.append( *(&rhs[1]->user.token) ); + + +#line 5316 "rlparse.cpp" +} break; +case 175: { +#line 1005 "rlparse.kl" + + (&redLel->user.token_type)->token = *(&rhs[0]->user.token); + + +#line 5324 "rlparse.cpp" +} break; +case 176: { +#line 1020 "rlparse.kl" + + /* An optimization to lessen the tree size. If a non-starred char is + * directly under the left side on the right and the right side is + * another non-starred char then paste them together and return the + * left side. Otherwise just put the two under a new reg exp node. */ + if ( (&rhs[1]->user.regular_expr_item)->reItem->type == ReItem::Data && !(&rhs[1]->user.regular_expr_item)->reItem->star && + (&rhs[0]->user.regular_expr)->regExpr->type == RegExpr::RecurseItem && + (&rhs[0]->user.regular_expr)->regExpr->item->type == ReItem::Data && !(&rhs[0]->user.regular_expr)->regExpr->item->star ) + { + /* Append the right side to the right side of the left and toss the + * right side. */ + (&rhs[0]->user.regular_expr)->regExpr->item->token.append( (&rhs[1]->user.regular_expr_item)->reItem->token ); + delete (&rhs[1]->user.regular_expr_item)->reItem; + (&redLel->user.regular_expr)->regExpr = (&rhs[0]->user.regular_expr)->regExpr; + } + else { + (&redLel->user.regular_expr)->regExpr = new RegExpr( (&rhs[0]->user.regular_expr)->regExpr, (&rhs[1]->user.regular_expr_item)->reItem ); + } + + +#line 5348 "rlparse.cpp" +} break; +case 177: { +#line 1040 "rlparse.kl" + + /* Can't optimize the tree. */ + (&redLel->user.regular_expr)->regExpr = new RegExpr(); + + +#line 5357 "rlparse.cpp" +} break; +case 178: { +#line 1052 "rlparse.kl" + + (&rhs[0]->user.regular_expr_char)->reItem->star = true; + (&redLel->user.regular_expr_item)->reItem = (&rhs[0]->user.regular_expr_char)->reItem; + + +#line 5366 "rlparse.cpp" +} break; +case 179: { +#line 1057 "rlparse.kl" + + (&redLel->user.regular_expr_item)->reItem = (&rhs[0]->user.regular_expr_char)->reItem; + + +#line 5374 "rlparse.cpp" +} break; +case 180: { +#line 1069 "rlparse.kl" + + (&redLel->user.regular_expr_char)->reItem = new ReItem( (&rhs[0]->user.token)->loc, (&rhs[1]->user.regular_expr_or_data)->reOrBlock, ReItem::OrBlock ); + + +#line 5382 "rlparse.cpp" +} break; +case 181: { +#line 1073 "rlparse.kl" + + (&redLel->user.regular_expr_char)->reItem = new ReItem( (&rhs[0]->user.token)->loc, (&rhs[1]->user.regular_expr_or_data)->reOrBlock, ReItem::NegOrBlock ); + + +#line 5390 "rlparse.cpp" +} break; +case 182: { +#line 1077 "rlparse.kl" + + (&redLel->user.regular_expr_char)->reItem = new ReItem( (&rhs[0]->user.token)->loc, ReItem::Dot ); + + +#line 5398 "rlparse.cpp" +} break; +case 183: { +#line 1081 "rlparse.kl" + + (&redLel->user.regular_expr_char)->reItem = new ReItem( (&rhs[0]->user.token)->loc, *(&rhs[0]->user.token) ); + + +#line 5406 "rlparse.cpp" +} break; +case 184: { +#line 1093 "rlparse.kl" + + /* An optimization to lessen the tree size. If an or char is directly + * under the left side on the right and the right side is another or + * char then paste them together and return the left side. Otherwise + * just put the two under a new or data node. */ + if ( (&rhs[1]->user.regular_expr_or_char)->reOrItem->type == ReOrItem::Data && + (&rhs[0]->user.regular_expr_or_data)->reOrBlock->type == ReOrBlock::RecurseItem && + (&rhs[0]->user.regular_expr_or_data)->reOrBlock->item->type == ReOrItem::Data ) + { + /* Append the right side to right side of the left and toss the + * right side. */ + (&rhs[0]->user.regular_expr_or_data)->reOrBlock->item->token.append( (&rhs[1]->user.regular_expr_or_char)->reOrItem->token ); + delete (&rhs[1]->user.regular_expr_or_char)->reOrItem; + (&redLel->user.regular_expr_or_data)->reOrBlock = (&rhs[0]->user.regular_expr_or_data)->reOrBlock; + } + else { + /* Can't optimize, put the left and right under a new node. */ + (&redLel->user.regular_expr_or_data)->reOrBlock = new ReOrBlock( (&rhs[0]->user.regular_expr_or_data)->reOrBlock, (&rhs[1]->user.regular_expr_or_char)->reOrItem ); + } + + +#line 5431 "rlparse.cpp" +} break; +case 185: { +#line 1114 "rlparse.kl" + + (&redLel->user.regular_expr_or_data)->reOrBlock = new ReOrBlock(); + + +#line 5439 "rlparse.cpp" +} break; +case 186: { +#line 1126 "rlparse.kl" + + (&redLel->user.regular_expr_or_char)->reOrItem = new ReOrItem( (&rhs[0]->user.token)->loc, *(&rhs[0]->user.token) ); + + +#line 5447 "rlparse.cpp" +} break; +case 187: { +#line 1130 "rlparse.kl" + + (&redLel->user.regular_expr_or_char)->reOrItem = new ReOrItem( (&rhs[1]->user.token)->loc, (&rhs[0]->user.token)->data[0], (&rhs[2]->user.token)->data[0] ); + + +#line 5455 "rlparse.cpp" +} break; +case 188: { +#line 1147 "rlparse.kl" + + /* Append the item to the list, return the list. */ + (&redLel->user.inline_list)->inlineList = (&rhs[0]->user.inline_list)->inlineList; + (&redLel->user.inline_list)->inlineList->append( (&rhs[1]->user.inline_item)->inlineItem ); + + +#line 5465 "rlparse.cpp" +} break; +case 189: { +#line 1154 "rlparse.kl" + + /* Start with empty list. */ + (&redLel->user.inline_list)->inlineList = new InlineList; + + +#line 5474 "rlparse.cpp" +} break; +case 190: { +#line 1169 "rlparse.kl" + + (&redLel->user.inline_item)->inlineItem = new InlineItem( (&rhs[0]->user.token_type)->token.loc, (&rhs[0]->user.token_type)->token.data, InlineItem::Text ); + + +#line 5482 "rlparse.cpp" +} break; +case 191: { +#line 1175 "rlparse.kl" + + (&redLel->user.inline_item)->inlineItem = new InlineItem( (&rhs[0]->user.token_type)->token.loc, (&rhs[0]->user.token_type)->token.data, InlineItem::Text ); + + +#line 5490 "rlparse.cpp" +} break; +case 192: { +#line 1181 "rlparse.kl" + + /* Pass the inline item up. */ + (&redLel->user.inline_item)->inlineItem = (&rhs[0]->user.inline_item)->inlineItem; + + +#line 5499 "rlparse.cpp" +} break; +case 193: { +#line 1188 "rlparse.kl" + (&redLel->user.token_type)->token = *(&rhs[0]->user.token); + +#line 5505 "rlparse.cpp" +} break; +case 194: { +#line 1189 "rlparse.kl" + (&redLel->user.token_type)->token = *(&rhs[0]->user.token); + +#line 5511 "rlparse.cpp" +} break; +case 195: { +#line 1190 "rlparse.kl" + (&redLel->user.token_type)->token = *(&rhs[0]->user.token); + +#line 5517 "rlparse.cpp" +} break; +case 196: { +#line 1191 "rlparse.kl" + (&redLel->user.token_type)->token = *(&rhs[0]->user.token); + +#line 5523 "rlparse.cpp" +} break; +case 197: { +#line 1192 "rlparse.kl" + (&redLel->user.token_type)->token = *(&rhs[0]->user.token); + +#line 5529 "rlparse.cpp" +} break; +case 198: { +#line 1193 "rlparse.kl" + (&redLel->user.token_type)->token = *(&rhs[0]->user.token); + +#line 5535 "rlparse.cpp" +} break; +case 199: { +#line 1197 "rlparse.kl" + + /* Pass up interpreted items of inline expressions. */ + (&redLel->user.inline_item)->inlineItem = (&rhs[0]->user.inline_item)->inlineItem; + + +#line 5544 "rlparse.cpp" +} break; +case 200: { +#line 1202 "rlparse.kl" + + (&redLel->user.inline_item)->inlineItem = new InlineItem( (&rhs[0]->user.token)->loc, InlineItem::Hold ); + + +#line 5552 "rlparse.cpp" +} break; +case 201: { +#line 1206 "rlparse.kl" + + (&redLel->user.inline_item)->inlineItem = new InlineItem( (&rhs[0]->user.token)->loc, InlineItem::Exec ); + (&redLel->user.inline_item)->inlineItem->children = (&rhs[1]->user.inline_list)->inlineList; + + +#line 5561 "rlparse.cpp" +} break; +case 202: { +#line 1211 "rlparse.kl" + + (&redLel->user.inline_item)->inlineItem = new InlineItem( (&rhs[0]->user.token)->loc, + new NameRef(nameRef), InlineItem::Goto ); + + +#line 5570 "rlparse.cpp" +} break; +case 203: { +#line 1216 "rlparse.kl" + + (&redLel->user.inline_item)->inlineItem = new InlineItem( (&rhs[0]->user.token)->loc, InlineItem::GotoExpr ); + (&redLel->user.inline_item)->inlineItem->children = (&rhs[2]->user.inline_list)->inlineList; + + +#line 5579 "rlparse.cpp" +} break; +case 204: { +#line 1221 "rlparse.kl" + + (&redLel->user.inline_item)->inlineItem = new InlineItem( (&rhs[0]->user.token)->loc, new NameRef(nameRef), InlineItem::Next ); + + +#line 5587 "rlparse.cpp" +} break; +case 205: { +#line 1225 "rlparse.kl" + + (&redLel->user.inline_item)->inlineItem = new InlineItem( (&rhs[0]->user.token)->loc, InlineItem::NextExpr ); + (&redLel->user.inline_item)->inlineItem->children = (&rhs[2]->user.inline_list)->inlineList; + + +#line 5596 "rlparse.cpp" +} break; +case 206: { +#line 1230 "rlparse.kl" + + (&redLel->user.inline_item)->inlineItem = new InlineItem( (&rhs[0]->user.token)->loc, new NameRef(nameRef), InlineItem::Call ); + + +#line 5604 "rlparse.cpp" +} break; +case 207: { +#line 1234 "rlparse.kl" + + (&redLel->user.inline_item)->inlineItem = new InlineItem( (&rhs[0]->user.token)->loc, InlineItem::CallExpr ); + (&redLel->user.inline_item)->inlineItem->children = (&rhs[2]->user.inline_list)->inlineList; + + +#line 5613 "rlparse.cpp" +} break; +case 208: { +#line 1239 "rlparse.kl" + + (&redLel->user.inline_item)->inlineItem = new InlineItem( (&rhs[0]->user.token)->loc, InlineItem::Ret ); + + +#line 5621 "rlparse.cpp" +} break; +case 209: { +#line 1243 "rlparse.kl" + + (&redLel->user.inline_item)->inlineItem = new InlineItem( (&rhs[0]->user.token)->loc, InlineItem::Break ); + + +#line 5629 "rlparse.cpp" +} break; +case 210: { +#line 1251 "rlparse.kl" + + (&redLel->user.inline_list)->inlineList = (&rhs[0]->user.inline_list)->inlineList; + (&redLel->user.inline_list)->inlineList->append( (&rhs[1]->user.inline_item)->inlineItem ); + + +#line 5638 "rlparse.cpp" +} break; +case 211: { +#line 1256 "rlparse.kl" + + /* Init the list used for this expr. */ + (&redLel->user.inline_list)->inlineList = new InlineList; + + +#line 5647 "rlparse.cpp" +} break; +case 212: { +#line 1265 "rlparse.kl" + + /* Return a text segment. */ + (&redLel->user.inline_item)->inlineItem = new InlineItem( (&rhs[0]->user.token_type)->token.loc, (&rhs[0]->user.token_type)->token.data, InlineItem::Text ); + + +#line 5656 "rlparse.cpp" +} break; +case 213: { +#line 1271 "rlparse.kl" + + /* Return a text segment, must heap alloc the text. */ + (&redLel->user.inline_item)->inlineItem = new InlineItem( (&rhs[0]->user.token_type)->token.loc, (&rhs[0]->user.token_type)->token.data, InlineItem::Text ); + + +#line 5665 "rlparse.cpp" +} break; +case 214: { +#line 1277 "rlparse.kl" + + /* Pass the inline item up. */ + (&redLel->user.inline_item)->inlineItem = (&rhs[0]->user.inline_item)->inlineItem; + + +#line 5674 "rlparse.cpp" +} break; +case 227: { +#line 1307 "rlparse.kl" + + (&redLel->user.inline_item)->inlineItem = new InlineItem( (&rhs[0]->user.token)->loc, InlineItem::PChar ); + + +#line 5682 "rlparse.cpp" +} break; +case 228: { +#line 1312 "rlparse.kl" + + (&redLel->user.inline_item)->inlineItem = new InlineItem( (&rhs[0]->user.token)->loc, InlineItem::Char ); + + +#line 5690 "rlparse.cpp" +} break; +case 229: { +#line 1317 "rlparse.kl" + + (&redLel->user.inline_item)->inlineItem = new InlineItem( (&rhs[0]->user.token)->loc, InlineItem::Curs ); + + +#line 5698 "rlparse.cpp" +} break; +case 230: { +#line 1322 "rlparse.kl" + + (&redLel->user.inline_item)->inlineItem = new InlineItem( (&rhs[0]->user.token)->loc, InlineItem::Targs ); + + +#line 5706 "rlparse.cpp" +} break; +case 231: { +#line 1327 "rlparse.kl" + + (&redLel->user.inline_item)->inlineItem = new InlineItem( (&rhs[0]->user.token)->loc, + new NameRef(nameRef), InlineItem::Entry ); + + +#line 5715 "rlparse.cpp" +} break; +case 233: { +#line 1338 "rlparse.kl" + + nameRef.empty(); + + +#line 5723 "rlparse.cpp" +} break; +case 235: { +#line 1348 "rlparse.kl" + + /* Insert an initial null pointer val to indicate the existence of the + * initial name seperator. */ + nameRef.setAs( 0 ); + + +#line 5733 "rlparse.cpp" +} break; +case 236: { +#line 1354 "rlparse.kl" + + nameRef.empty(); + + +#line 5741 "rlparse.cpp" +} break; +case 237: { +#line 1361 "rlparse.kl" + + nameRef.append( (&rhs[2]->user.token)->data ); + + +#line 5749 "rlparse.cpp" +} break; +case 238: { +#line 1366 "rlparse.kl" + + nameRef.append( (&rhs[0]->user.token)->data ); + + +#line 5757 "rlparse.cpp" +} break; +} + } +} + + if ( lel->child != 0 ) { + struct Parser_LangEl *first = lel->child; + struct Parser_LangEl *child = lel->child; + numNodes -= 1; + lel->child = 0; + while ( child->next != 0 ) { + child = child->next; + numNodes -= 1; + } + child->next = pool; + pool = first; + } + } + +hit_final: + if ( sp > 0 ) { + /* Figure out which place to return to. */ + if ( cmStack[sp-1]->next == lel ) { + lel = cmStack[--sp]; + goto final_reverse; + } + else { + lel = cmStack[--sp]; + goto final_upwards; + } + } + + lastFinal = lel; + free( cmStack ); + } + } + } + + if ( *action & 0x2 ) { + int fssRed = *action >> 2; + int reduction = Parser_fssProdIdIndex[fssRed]; + struct Parser_LangEl *redLel; + if ( pool == 0 ) { + if ( freshPos == 8128 ) { + freshEl = (struct Parser_LangEl*) malloc( + sizeof(struct Parser_LangEl)*8128); + #ifdef LOG_ACTIONS + cerr << "allocating 8128 LangEls" << endl; + #endif + freshPos = 0; + } + redLel = freshEl + freshPos++; + } + else { + redLel = pool; + pool = pool->next; + } + numNodes += 1; + redLel->type = Parser_prodLhsIds[reduction]; + redLel->reduction = reduction; + redLel->child = 0; + redLel->next = 0; + redLel->retry = (lel->retry << 16); + lel->retry &= 0xffff0000; + + rhsLen = Parser_fssProdLengths[fssRed]; + if ( rhsLen > 0 ) { + int r; + for ( r = rhsLen-1; r > 0; r-- ) { + rhs[r] = stackTop; + stackTop = stackTop->next; + } + rhs[0] = stackTop; + stackTop = stackTop->next; + rhs[0]->next = 0; + } +switch ( reduction ) { +case 215: { +#line 1284 "rlparse.kl" + (&redLel->user.token_type)->token = *(&rhs[0]->user.token); + +#line 5839 "rlparse.cpp" +} break; +case 216: { +#line 1285 "rlparse.kl" + (&redLel->user.token_type)->token = *(&rhs[0]->user.token); + +#line 5845 "rlparse.cpp" +} break; +case 217: { +#line 1286 "rlparse.kl" + (&redLel->user.token_type)->token = *(&rhs[0]->user.token); + +#line 5851 "rlparse.cpp" +} break; +case 218: { +#line 1287 "rlparse.kl" + (&redLel->user.token_type)->token = *(&rhs[0]->user.token); + +#line 5857 "rlparse.cpp" +} break; +case 219: { +#line 1288 "rlparse.kl" + (&redLel->user.token_type)->token = *(&rhs[0]->user.token); + +#line 5863 "rlparse.cpp" +} break; +case 220: { +#line 1289 "rlparse.kl" + (&redLel->user.token_type)->token = *(&rhs[0]->user.token); + +#line 5869 "rlparse.cpp" +} break; +case 221: { +#line 1290 "rlparse.kl" + (&redLel->user.token_type)->token = *(&rhs[0]->user.token); + +#line 5875 "rlparse.cpp" +} break; +case 222: { +#line 1297 "rlparse.kl" + (&redLel->user.token_type)->token = *(&rhs[0]->user.token); + +#line 5881 "rlparse.cpp" +} break; +case 223: { +#line 1298 "rlparse.kl" + (&redLel->user.token_type)->token = *(&rhs[0]->user.token); + +#line 5887 "rlparse.cpp" +} break; +case 224: { +#line 1299 "rlparse.kl" + (&redLel->user.token_type)->token = *(&rhs[0]->user.token); + +#line 5893 "rlparse.cpp" +} break; +case 225: { +#line 1300 "rlparse.kl" + (&redLel->user.token_type)->token = *(&rhs[0]->user.token); + +#line 5899 "rlparse.cpp" +} break; +case 226: { +#line 1301 "rlparse.kl" + (&redLel->user.token_type)->token = *(&rhs[0]->user.token); + +#line 5905 "rlparse.cpp" +} break; +} + #ifdef LOG_ACTIONS + cerr << "reduced: " + << Parser_prodNames[reduction] + << " rhsLen: " << rhsLen; + #endif + if ( action[1] == 0 ) + redLel->retry = 0; + else { + redLel->retry += 0x10000; + numRetry += 1; + #ifdef LOG_ACTIONS + cerr << " retry: " << redLel; + #endif + } + + #ifdef LOG_ACTIONS + cerr << endl; + #endif + + if ( rhsLen == 0 ) { + redLel->file = lel->file; + redLel->line = lel->line; + targState = curs; + } + else { + redLel->child = rhs[rhsLen-1]; + redLel->file = rhs[0]->file; + redLel->line = rhs[0]->line; + targState = rhs[0]->state; + } + + if ( induceReject ) { + #ifdef LOG_ACTIONS + cerr << "error induced during reduction of " << + Parser_lelNames[redLel->type] << endl; + #endif + redLel->state = curs; + redLel->next = stackTop; + stackTop = redLel; + curs = targState; + goto parseError; + } + else { + redLel->next = input; + input = redLel; + } + } + + + curs = targState; + goto again; + +parseError: + #ifdef LOG_BACKTRACK + cerr << "hit error" << endl; + #endif + if ( numRetry > 0 ) { + while ( 1 ) { + struct Parser_LangEl *redLel = stackTop; + if ( stackTop->type < 225 ) { + #ifdef LOG_BACKTRACK + cerr << "backing up over terminal: " << + Parser_lelNames[stackTop->type] << endl; + #endif + stackTop = stackTop->next; + redLel->next = input; + input = redLel; + } + else { + #ifdef LOG_BACKTRACK + cerr << "backing up over non-terminal: " << + Parser_lelNames[stackTop->type] << endl; + #endif + stackTop = stackTop->next; + struct Parser_LangEl *first = redLel->child; + if ( first == 0 ) + rhsLen = 0; + else { + rhsLen = 1; + while ( first->next != 0 ) { + first = first->next; + rhsLen += 1; + } + first->next = stackTop; + stackTop = redLel->child; + + struct Parser_LangEl *rhsEl = stackTop; + int p = rhsLen; + while ( p > 0 ) { + rhs[--p] = rhsEl; + rhsEl = rhsEl->next; + } + } + redLel->next = pool; + pool = redLel; + numNodes -= 1; + } + + if ( redLel->retry > 0 ) { + #ifdef LOG_BACKTRACK + cerr << "found retry targ: " << redLel << endl; + #endif + numRetry -= 1; + #ifdef LOG_BACKTRACK + cerr << "found retry: " << redLel << endl; + #endif + if ( redLel->retry & 0x0000ffff ) + curs = input->state; + else { + input->retry = redLel->retry >> 16; + if ( stackTop->state < 0 ) + curs = Parser_startState; + else { + curs = Parser_targs[(int)Parser_indicies[Parser_offsets[stackTop->state] + (stackTop->type - Parser_keys[stackTop->state<<1])]]; + } + } + goto again; + } + } + } + curs = -1; + errCount += 1; +_out: {} +#line 1385 "rlparse.kl" + return errCount == 0 ? 0 : -1; +} + +void Parser::tryMachineDef( InputLoc &loc, char *name, + JoinOrLm *joinOrLm, bool isInstance ) +{ + GraphDictEl *newEl = pd->graphDict.insert( name ); + if ( newEl != 0 ) { + /* New element in the dict, all good. */ + newEl->value = new VarDef( name, joinOrLm ); + newEl->isInstance = isInstance; + newEl->loc = loc; + newEl->value->isExport = exportContext[exportContext.length()-1]; + + /* It it is an instance, put on the instance list. */ + if ( isInstance ) + pd->instanceList.append( newEl ); + } + else { + // Recover by ignoring the duplicate. + error(loc) << "fsm \"" << name << "\" previously defined" << endl; + } +} + +ostream &Parser::parse_error( int tokId, Token &token ) +{ + /* Maintain the error count. */ + gblErrorCount += 1; + + cerr << token.loc.fileName << ":" << token.loc.line << ":" << token.loc.col << ": "; + cerr << "at token "; + if ( tokId < 128 ) + cerr << "\"" << Parser_lelNames[tokId] << "\""; + else + cerr << Parser_lelNames[tokId]; + if ( token.data != 0 ) + cerr << " with data \"" << token.data << "\""; + cerr << ": "; + + return cerr; +} + +int Parser::token( InputLoc &loc, int tokId, char *tokstart, int toklen ) +{ + Token token; + token.data = tokstart; + token.length = toklen; + token.loc = loc; + int res = parseLangEl( tokId, &token ); + if ( res < 0 ) { + parse_error(tokId, token) << "parse error" << endl; + exit(1); + } + return res; +} diff --git a/contrib/tools/ragel5/ragel/rlparse.h b/contrib/tools/ragel5/ragel/rlparse.h new file mode 100644 index 0000000000..957db0fd69 --- /dev/null +++ b/contrib/tools/ragel5/ragel/rlparse.h @@ -0,0 +1,184 @@ +/* Automatically generated by Kelbt from "rlparse.kh". + * + * Parts of this file are copied from Kelbt source covered by the GNU + * GPL. As a special exception, you may use the parts of this file copied + * from Kelbt source without restriction. The remainder is derived from + * "rlparse.kh" and inherits the copyright status of that file. + */ + +#line 1 "rlparse.kh" +/* + * Copyright 2001-2007 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef RLPARSE_H +#define RLPARSE_H + +#include <iostream> +#include "avltree.h" +#include "parsedata.h" + +struct Parser +{ +#line 93 "rlparse.kh" + + + #line 44 "rlparse.h" + struct Parser_LangEl *freshEl; + int freshPos; + struct Parser_LangEl *pool; + int numRetry; + int numNodes; + struct Parser_LangEl *stackTop; + struct Parser_LangEl *lastFinal; + int errCount; + int curs; +#line 96 "rlparse.kh" + + void init(); + int parseLangEl( int type, const Token *token ); + + Parser(const char *fileName, char *sectionName, InputLoc §ionLoc ) + : sectionName(sectionName) + { + pd = new ParseData( fileName, sectionName, sectionLoc ); + exportContext.append( false ); + } + + int token( InputLoc &loc, int tokId, char *tokstart, int toklen ); + void tryMachineDef( InputLoc &loc, char *name, + JoinOrLm *joinOrLm, bool isInstance ); + + /* Report an error encountered by the parser. */ + ostream &parse_error( int tokId, Token &token ); + + ParseData *pd; + + /* The name of the root section, this does not change during an include. */ + char *sectionName; + + NameRef nameRef; + NameRefList nameRefList; + + Vector<bool> exportContext; +}; + +#line 84 "rlparse.h" +#define KW_Machine 128 +#define KW_Include 129 +#define KW_Import 130 +#define KW_Write 131 +#define TK_Word 132 +#define TK_Literal 133 +#define TK_Number 134 +#define TK_Inline 135 +#define TK_Reference 136 +#define TK_ColonEquals 137 +#define TK_EndSection 138 +#define TK_UInt 139 +#define TK_Hex 140 +#define TK_BaseClause 141 +#define TK_DotDot 142 +#define TK_ColonGt 143 +#define TK_ColonGtGt 144 +#define TK_LtColon 145 +#define TK_Arrow 146 +#define TK_DoubleArrow 147 +#define TK_StarStar 148 +#define TK_NameSep 149 +#define TK_BarStar 150 +#define TK_DashDash 151 +#define TK_StartCond 152 +#define TK_AllCond 153 +#define TK_LeavingCond 154 +#define TK_Middle 155 +#define TK_StartGblError 156 +#define TK_AllGblError 157 +#define TK_FinalGblError 158 +#define TK_NotFinalGblError 159 +#define TK_NotStartGblError 160 +#define TK_MiddleGblError 161 +#define TK_StartLocalError 162 +#define TK_AllLocalError 163 +#define TK_FinalLocalError 164 +#define TK_NotFinalLocalError 165 +#define TK_NotStartLocalError 166 +#define TK_MiddleLocalError 167 +#define TK_StartEOF 168 +#define TK_AllEOF 169 +#define TK_FinalEOF 170 +#define TK_NotFinalEOF 171 +#define TK_NotStartEOF 172 +#define TK_MiddleEOF 173 +#define TK_StartToState 174 +#define TK_AllToState 175 +#define TK_FinalToState 176 +#define TK_NotFinalToState 177 +#define TK_NotStartToState 178 +#define TK_MiddleToState 179 +#define TK_StartFromState 180 +#define TK_AllFromState 181 +#define TK_FinalFromState 182 +#define TK_NotFinalFromState 183 +#define TK_NotStartFromState 184 +#define TK_MiddleFromState 185 +#define RE_Slash 186 +#define RE_SqOpen 187 +#define RE_SqOpenNeg 188 +#define RE_SqClose 189 +#define RE_Dot 190 +#define RE_Star 191 +#define RE_Dash 192 +#define RE_Char 193 +#define IL_WhiteSpace 194 +#define IL_Comment 195 +#define IL_Literal 196 +#define IL_Symbol 197 +#define KW_Action 198 +#define KW_AlphType 199 +#define KW_Range 200 +#define KW_GetKey 201 +#define KW_When 202 +#define KW_Eof 203 +#define KW_Err 204 +#define KW_Lerr 205 +#define KW_To 206 +#define KW_From 207 +#define KW_Export 208 +#define KW_Break 209 +#define KW_Exec 210 +#define KW_Hold 211 +#define KW_PChar 212 +#define KW_Char 213 +#define KW_Goto 214 +#define KW_Call 215 +#define KW_Ret 216 +#define KW_CurState 217 +#define KW_TargState 218 +#define KW_Entry 219 +#define KW_Next 220 +#define KW_Variable 221 +#define KW_Access 222 +#define TK_Semi 223 +#define _eof 224 + +#line 126 "rlparse.kh" + +#endif diff --git a/contrib/tools/ragel5/ragel/rlscan.cpp b/contrib/tools/ragel5/ragel/rlscan.cpp new file mode 100644 index 0000000000..47a7f02148 --- /dev/null +++ b/contrib/tools/ragel5/ragel/rlscan.cpp @@ -0,0 +1,4876 @@ +#line 1 "rlscan.rl" +/* + * Copyright 2006-2007 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <iostream> +#include <fstream> +#include <string.h> + +#include "ragel.h" +#include "rlscan.h" + +//#define LOG_TOKENS + +using std::ifstream; +using std::istream; +using std::ostream; +using std::cout; +using std::cerr; +using std::endl; + +enum InlineBlockType +{ + CurlyDelimited, + SemiTerminated +}; + + +/* + * The Scanner for Importing + */ + +#define IMP_Word 128 +#define IMP_Literal 129 +#define IMP_UInt 130 +#define IMP_Define 131 + +#line 124 "rlscan.rl" + + + +#line 60 "rlscan.cpp" +static const int inline_token_scan_start = 2; + +static const int inline_token_scan_first_final = 2; + +static const int inline_token_scan_error = -1; + +#line 127 "rlscan.rl" + +void Scanner::flushImport() +{ + int *p = token_data; + int *pe = token_data + cur_token; + + +#line 75 "rlscan.cpp" + { + tok_cs = inline_token_scan_start; + tok_tokstart = 0; + tok_tokend = 0; + tok_act = 0; + } +#line 134 "rlscan.rl" + +#line 84 "rlscan.cpp" + { + if ( p == pe ) + goto _out; + switch ( tok_cs ) + { +tr0: +#line 122 "rlscan.rl" + {{p = (( tok_tokend))-1;}} + goto st2; +tr1: +#line 108 "rlscan.rl" + { tok_tokend = p+1;{ + int base = tok_tokstart - token_data; + int nameOff = 0; + int litOff = 2; + + directToParser( inclToParser, fileName, line, column, TK_Word, + token_strings[base+nameOff], token_lens[base+nameOff] ); + directToParser( inclToParser, fileName, line, column, '=', 0, 0 ); + directToParser( inclToParser, fileName, line, column, TK_Literal, + token_strings[base+litOff], token_lens[base+litOff] ); + directToParser( inclToParser, fileName, line, column, ';', 0, 0 ); + }{p = (( tok_tokend))-1;}} + goto st2; +tr2: +#line 80 "rlscan.rl" + { tok_tokend = p+1;{ + int base = tok_tokstart - token_data; + int nameOff = 0; + int numOff = 2; + + directToParser( inclToParser, fileName, line, column, TK_Word, + token_strings[base+nameOff], token_lens[base+nameOff] ); + directToParser( inclToParser, fileName, line, column, '=', 0, 0 ); + directToParser( inclToParser, fileName, line, column, TK_UInt, + token_strings[base+numOff], token_lens[base+numOff] ); + directToParser( inclToParser, fileName, line, column, ';', 0, 0 ); + }{p = (( tok_tokend))-1;}} + goto st2; +tr3: +#line 94 "rlscan.rl" + { tok_tokend = p+1;{ + int base = tok_tokstart - token_data; + int nameOff = 1; + int litOff = 2; + + directToParser( inclToParser, fileName, line, column, TK_Word, + token_strings[base+nameOff], token_lens[base+nameOff] ); + directToParser( inclToParser, fileName, line, column, '=', 0, 0 ); + directToParser( inclToParser, fileName, line, column, TK_Literal, + token_strings[base+litOff], token_lens[base+litOff] ); + directToParser( inclToParser, fileName, line, column, ';', 0, 0 ); + }{p = (( tok_tokend))-1;}} + goto st2; +tr4: +#line 66 "rlscan.rl" + { tok_tokend = p+1;{ + int base = tok_tokstart - token_data; + int nameOff = 1; + int numOff = 2; + + directToParser( inclToParser, fileName, line, column, TK_Word, + token_strings[base+nameOff], token_lens[base+nameOff] ); + directToParser( inclToParser, fileName, line, column, '=', 0, 0 ); + directToParser( inclToParser, fileName, line, column, TK_UInt, + token_strings[base+numOff], token_lens[base+numOff] ); + directToParser( inclToParser, fileName, line, column, ';', 0, 0 ); + }{p = (( tok_tokend))-1;}} + goto st2; +tr5: +#line 122 "rlscan.rl" + { tok_tokend = p+1;{p = (( tok_tokend))-1;}} + goto st2; +tr8: +#line 122 "rlscan.rl" + { tok_tokend = p;{p = (( tok_tokend))-1;}} + goto st2; +st2: +#line 1 "rlscan.rl" + { tok_tokstart = 0;} + if ( ++p == pe ) + goto _out2; +case 2: +#line 1 "rlscan.rl" + { tok_tokstart = p;} +#line 170 "rlscan.cpp" + switch( (*p) ) { + case 128: goto tr6; + case 131: goto tr7; + } + goto tr5; +tr6: +#line 1 "rlscan.rl" + { tok_tokend = p+1;} + goto st3; +st3: + if ( ++p == pe ) + goto _out3; +case 3: +#line 184 "rlscan.cpp" + if ( (*p) == 61 ) + goto st0; + goto tr8; +st0: + if ( ++p == pe ) + goto _out0; +case 0: + switch( (*p) ) { + case 129: goto tr1; + case 130: goto tr2; + } + goto tr0; +tr7: +#line 1 "rlscan.rl" + { tok_tokend = p+1;} + goto st4; +st4: + if ( ++p == pe ) + goto _out4; +case 4: +#line 205 "rlscan.cpp" + if ( (*p) == 128 ) + goto st1; + goto tr8; +st1: + if ( ++p == pe ) + goto _out1; +case 1: + switch( (*p) ) { + case 129: goto tr3; + case 130: goto tr4; + } + goto tr0; + } + _out2: tok_cs = 2; goto _out; + _out3: tok_cs = 3; goto _out; + _out0: tok_cs = 0; goto _out; + _out4: tok_cs = 4; goto _out; + _out1: tok_cs = 1; goto _out; + + _out: {} + } +#line 135 "rlscan.rl" + + if ( tok_tokstart == 0 ) + cur_token = 0; + else { + cur_token = pe - tok_tokstart; + int ts_offset = tok_tokstart - token_data; + memmove( token_data, token_data+ts_offset, cur_token*sizeof(token_data[0]) ); + memmove( token_strings, token_strings+ts_offset, cur_token*sizeof(token_strings[0]) ); + memmove( token_lens, token_lens+ts_offset, cur_token*sizeof(token_lens[0]) ); + } +} + +void Scanner::directToParser( Parser *toParser, const char *tokFileName, int tokLine, + int tokColumn, int type, char *tokdata, int toklen ) +{ + InputLoc loc; + + #ifdef LOG_TOKENS + cerr << "scanner:" << tokLine << ":" << tokColumn << + ": sending token to the parser " << Parser_lelNames[type]; + cerr << " " << toklen; + if ( tokdata != 0 ) + cerr << " " << tokdata; + cerr << endl; + #endif + + loc.fileName = tokFileName; + loc.line = tokLine; + loc.col = tokColumn; + + toParser->token( loc, type, tokdata, toklen ); +} + +void Scanner::importToken( int token, char *start, char *end ) +{ + if ( cur_token == max_tokens ) + flushImport(); + + token_data[cur_token] = token; + if ( start == 0 ) { + token_strings[cur_token] = 0; + token_lens[cur_token] = 0; + } + else { + int toklen = end-start; + token_lens[cur_token] = toklen; + token_strings[cur_token] = new char[toklen+1]; + memcpy( token_strings[cur_token], start, toklen ); + token_strings[cur_token][toklen] = 0; + } + cur_token++; +} + +void Scanner::pass( int token, char *start, char *end ) +{ + if ( importMachines ) + importToken( token, start, end ); + pass(); +} + +void Scanner::pass() +{ + updateCol(); + + /* If no errors and we are at the bottom of the include stack (the + * source file listed on the command line) then write out the data. */ + if ( includeDepth == 0 && machineSpec == 0 && machineName == 0 ) + xmlEscapeHost( output, tokstart, tokend-tokstart ); +} + +/* + * The scanner for processing sections, includes, imports, etc. + */ + + +#line 303 "rlscan.cpp" +static const int section_parse_start = 10; + +static const int section_parse_first_final = 10; + +static const int section_parse_error = 0; + +#line 213 "rlscan.rl" + + + +void Scanner::init( ) +{ + +#line 317 "rlscan.cpp" + { + cs = section_parse_start; + } +#line 219 "rlscan.rl" +} + +bool Scanner::active() +{ + if ( ignoreSection ) + return false; + + if ( parser == 0 && ! parserExistsError ) { + scan_error() << "there is no previous specification name" << endl; + parserExistsError = true; + } + + if ( parser == 0 ) + return false; + + return true; +} + +ostream &Scanner::scan_error() +{ + /* Maintain the error count. */ + gblErrorCount += 1; + cerr << fileName << ":" << line << ":" << column << ": "; + return cerr; +} + +bool Scanner::recursiveInclude(const char *inclFileName, char *inclSectionName ) +{ + for ( IncludeStack::Iter si = includeStack; si.lte(); si++ ) { + if ( strcmp( si->fileName, inclFileName ) == 0 && + strcmp( si->sectionName, inclSectionName ) == 0 ) + { + return true; + } + } + return false; +} + +void Scanner::updateCol() +{ + char *from = lastnl; + if ( from == 0 ) + from = tokstart; + //cerr << "adding " << tokend - from << " to column" << endl; + column += tokend - from; + lastnl = 0; +} + +#line 442 "rlscan.rl" + + +void Scanner::token( int type, char c ) +{ + token( type, &c, &c + 1 ); +} + +void Scanner::token( int type ) +{ + token( type, 0, 0 ); +} + +void Scanner::token( int type, char *start, char *end ) +{ + char *tokdata = 0; + int toklen = 0; + if ( start != 0 ) { + toklen = end-start; + tokdata = new char[toklen+1]; + memcpy( tokdata, start, toklen ); + tokdata[toklen] = 0; + } + + processToken( type, tokdata, toklen ); +} + +void Scanner::processToken( int type, char *tokdata, int toklen ) +{ + int *p = &type; + int *pe = &type + 1; + + +#line 403 "rlscan.cpp" + { + if ( p == pe ) + goto _out; + switch ( cs ) + { +tr2: +#line 289 "rlscan.rl" + { + /* Assign a name to the machine. */ + char *machine = word; + + if ( !importMachines && inclSectionTarg == 0 ) { + ignoreSection = false; + + ParserDictEl *pdEl = parserDict.find( machine ); + if ( pdEl == 0 ) { + pdEl = new ParserDictEl( machine ); + pdEl->value = new Parser( fileName, machine, sectionLoc ); + pdEl->value->init(); + parserDict.insert( pdEl ); + } + + parser = pdEl->value; + } + else if ( !importMachines && strcmp( inclSectionTarg, machine ) == 0 ) { + /* found include target */ + ignoreSection = false; + parser = inclToParser; + } + else { + /* ignoring section */ + ignoreSection = true; + parser = 0; + } + } + goto st10; +tr6: +#line 323 "rlscan.rl" + { + if ( active() ) { + char *inclSectionName = word; + const char *inclFileName = 0; + + /* Implement defaults for the input file and section name. */ + if ( inclSectionName == 0 ) + inclSectionName = parser->sectionName; + + if ( lit != 0 ) + inclFileName = prepareFileName( lit, lit_len ); + else + inclFileName = fileName; + + /* Check for a recursive include structure. Add the current file/section + * name then check if what we are including is already in the stack. */ + includeStack.append( IncludeStackItem( fileName, parser->sectionName ) ); + + if ( recursiveInclude( inclFileName, inclSectionName ) ) + scan_error() << "include: this is a recursive include operation" << endl; + else { + /* Open the input file for reading. */ + ifstream *inFile = new ifstream( inclFileName ); + if ( ! inFile->is_open() ) { + scan_error() << "include: could not open " << + inclFileName << " for reading" << endl; + } + + Scanner scanner( inclFileName, *inFile, output, parser, + inclSectionName, includeDepth+1, false ); + scanner.do_scan( ); + delete inFile; + } + + /* Remove the last element (len-1) */ + includeStack.remove( -1 ); + } + } + goto st10; +tr10: +#line 372 "rlscan.rl" + { + if ( active() ) { + char *importFileName = prepareFileName( lit, lit_len ); + + /* Open the input file for reading. */ + ifstream *inFile = new ifstream( importFileName ); + if ( ! inFile->is_open() ) { + scan_error() << "import: could not open " << + importFileName << " for reading" << endl; + } + + Scanner scanner( importFileName, *inFile, output, parser, + 0, includeDepth+1, true ); + scanner.do_scan( ); + scanner.importToken( 0, 0, 0 ); + scanner.flushImport(); + delete inFile; + } + } + goto st10; +tr13: +#line 414 "rlscan.rl" + { + if ( active() && machineSpec == 0 && machineName == 0 ) + output << "</write>\n"; + } + goto st10; +tr14: +#line 425 "rlscan.rl" + { + /* Send the token off to the parser. */ + if ( active() ) + directToParser( parser, fileName, line, column, type, tokdata, toklen ); + } + goto st10; +st10: + if ( ++p == pe ) + goto _out10; +case 10: +#line 522 "rlscan.cpp" + switch( (*p) ) { + case 128: goto st1; + case 129: goto st3; + case 130: goto st6; + case 131: goto tr18; + } + goto tr14; +st1: + if ( ++p == pe ) + goto _out1; +case 1: + if ( (*p) == 132 ) + goto tr1; + goto tr0; +tr0: +#line 283 "rlscan.rl" + { scan_error() << "bad machine statement" << endl; } + goto st0; +tr3: +#line 284 "rlscan.rl" + { scan_error() << "bad include statement" << endl; } + goto st0; +tr8: +#line 285 "rlscan.rl" + { scan_error() << "bad import statement" << endl; } + goto st0; +tr11: +#line 286 "rlscan.rl" + { scan_error() << "bad write statement" << endl; } + goto st0; +#line 553 "rlscan.cpp" +st0: + goto _out0; +tr1: +#line 280 "rlscan.rl" + { word = tokdata; word_len = toklen; } + goto st2; +st2: + if ( ++p == pe ) + goto _out2; +case 2: +#line 564 "rlscan.cpp" + if ( (*p) == 59 ) + goto tr2; + goto tr0; +st3: + if ( ++p == pe ) + goto _out3; +case 3: + switch( (*p) ) { + case 132: goto tr4; + case 133: goto tr5; + } + goto tr3; +tr4: +#line 279 "rlscan.rl" + { word = lit = 0; word_len = lit_len = 0; } +#line 280 "rlscan.rl" + { word = tokdata; word_len = toklen; } + goto st4; +st4: + if ( ++p == pe ) + goto _out4; +case 4: +#line 587 "rlscan.cpp" + switch( (*p) ) { + case 59: goto tr6; + case 133: goto tr7; + } + goto tr3; +tr5: +#line 279 "rlscan.rl" + { word = lit = 0; word_len = lit_len = 0; } +#line 281 "rlscan.rl" + { lit = tokdata; lit_len = toklen; } + goto st5; +tr7: +#line 281 "rlscan.rl" + { lit = tokdata; lit_len = toklen; } + goto st5; +st5: + if ( ++p == pe ) + goto _out5; +case 5: +#line 607 "rlscan.cpp" + if ( (*p) == 59 ) + goto tr6; + goto tr3; +st6: + if ( ++p == pe ) + goto _out6; +case 6: + if ( (*p) == 133 ) + goto tr9; + goto tr8; +tr9: +#line 281 "rlscan.rl" + { lit = tokdata; lit_len = toklen; } + goto st7; +st7: + if ( ++p == pe ) + goto _out7; +case 7: +#line 626 "rlscan.cpp" + if ( (*p) == 59 ) + goto tr10; + goto tr8; +tr18: +#line 397 "rlscan.rl" + { + if ( active() && machineSpec == 0 && machineName == 0 ) { + output << "<write" + " def_name=\"" << parser->sectionName << "\"" + " line=\"" << line << "\"" + " col=\"" << column << "\"" + ">"; + } + } + goto st8; +st8: + if ( ++p == pe ) + goto _out8; +case 8: +#line 646 "rlscan.cpp" + if ( (*p) == 132 ) + goto tr12; + goto tr11; +tr12: +#line 408 "rlscan.rl" + { + if ( active() && machineSpec == 0 && machineName == 0 ) + output << "<arg>" << tokdata << "</arg>"; + } + goto st9; +st9: + if ( ++p == pe ) + goto _out9; +case 9: +#line 661 "rlscan.cpp" + switch( (*p) ) { + case 59: goto tr13; + case 132: goto tr12; + } + goto tr11; + } + _out10: cs = 10; goto _out; + _out1: cs = 1; goto _out; + _out0: cs = 0; goto _out; + _out2: cs = 2; goto _out; + _out3: cs = 3; goto _out; + _out4: cs = 4; goto _out; + _out5: cs = 5; goto _out; + _out6: cs = 6; goto _out; + _out7: cs = 7; goto _out; + _out8: cs = 8; goto _out; + _out9: cs = 9; goto _out; + + _out: {} + } +#line 476 "rlscan.rl" + + + updateCol(); + + /* Record the last token for use in controlling the scan of subsequent + * tokens. */ + lastToken = type; +} + +void Scanner::startSection( ) +{ + parserExistsError = false; + + if ( includeDepth == 0 ) { + if ( machineSpec == 0 && machineName == 0 ) + output << "</host>\n"; + } + + sectionLoc.fileName = fileName; + sectionLoc.line = line; + sectionLoc.col = 0; +} + +void Scanner::endSection( ) +{ + /* Execute the eof actions for the section parser. */ + +#line 710 "rlscan.cpp" + { + switch ( cs ) { + case 1: + case 2: +#line 283 "rlscan.rl" + { scan_error() << "bad machine statement" << endl; } + break; + case 3: + case 4: + case 5: +#line 284 "rlscan.rl" + { scan_error() << "bad include statement" << endl; } + break; + case 6: + case 7: +#line 285 "rlscan.rl" + { scan_error() << "bad import statement" << endl; } + break; + case 8: + case 9: +#line 286 "rlscan.rl" + { scan_error() << "bad write statement" << endl; } + break; +#line 734 "rlscan.cpp" + } + } + +#line 505 "rlscan.rl" + + + /* Close off the section with the parser. */ + if ( active() ) { + InputLoc loc; + loc.fileName = fileName; + loc.line = line; + loc.col = 0; + + parser->token( loc, TK_EndSection, 0, 0 ); + } + + if ( includeDepth == 0 ) { + if ( machineSpec == 0 && machineName == 0 ) { + /* The end section may include a newline on the end, so + * we use the last line, which will count the newline. */ + output << "<host line=\"" << line << "\">"; + } + } +} + +#line 917 "rlscan.rl" + + + +#line 764 "rlscan.cpp" +static const int rlscan_start = 23; + +static const int rlscan_first_final = 23; + +static const int rlscan_error = 0; + +#line 920 "rlscan.rl" + +void Scanner::do_scan() +{ + int bufsize = 8; + char *buf = new char[bufsize]; + const char last_char = 0; + int cs, act, have = 0; + int top, stack[1]; + int curly_count = 0; + bool execute = true; + bool singleLineSpec = false; + InlineBlockType inlineBlockType = CurlyDelimited; + + /* Init the section parser and the character scanner. */ + init(); + +#line 788 "rlscan.cpp" + { + cs = rlscan_start; + top = 0; + tokstart = 0; + tokend = 0; + act = 0; + } +#line 936 "rlscan.rl" + + while ( execute ) { + char *p = buf + have; + int space = bufsize - have; + + if ( space == 0 ) { + /* We filled up the buffer trying to scan a token. Grow it. */ + bufsize = bufsize * 2; + char *newbuf = new char[bufsize]; + + /* Recompute p and space. */ + p = newbuf + have; + space = bufsize - have; + + /* Patch up pointers possibly in use. */ + if ( tokstart != 0 ) + tokstart = newbuf + ( tokstart - buf ); + tokend = newbuf + ( tokend - buf ); + + /* Copy the new buffer in. */ + memcpy( newbuf, buf, have ); + delete[] buf; + buf = newbuf; + } + + input.read( p, space ); + int len = input.gcount(); + + /* If we see eof then append the EOF char. */ + if ( len == 0 ) { + p[0] = last_char, len = 1; + execute = false; + } + + char *pe = p + len; + +#line 833 "rlscan.cpp" + { + if ( p == pe ) + goto _out; + goto _resume; + +_again: + switch ( cs ) { + case 23: goto st23; + case 24: goto st24; + case 25: goto st25; + case 1: goto st1; + case 2: goto st2; + case 26: goto st26; + case 27: goto st27; + case 28: goto st28; + case 3: goto st3; + case 4: goto st4; + case 29: goto st29; + case 5: goto st5; + case 6: goto st6; + case 7: goto st7; + case 30: goto st30; + case 31: goto st31; + case 32: goto st32; + case 33: goto st33; + case 34: goto st34; + case 35: goto st35; + case 36: goto st36; + case 37: goto st37; + case 38: goto st38; + case 39: goto st39; + case 8: goto st8; + case 9: goto st9; + case 40: goto st40; + case 10: goto st10; + case 11: goto st11; + case 41: goto st41; + case 12: goto st12; + case 13: goto st13; + case 14: goto st14; + case 42: goto st42; + case 43: goto st43; + case 15: goto st15; + case 44: goto st44; + case 45: goto st45; + case 46: goto st46; + case 47: goto st47; + case 48: goto st48; + case 49: goto st49; + case 50: goto st50; + case 51: goto st51; + case 52: goto st52; + case 53: goto st53; + case 54: goto st54; + case 55: goto st55; + case 56: goto st56; + case 57: goto st57; + case 58: goto st58; + case 59: goto st59; + case 60: goto st60; + case 61: goto st61; + case 62: goto st62; + case 63: goto st63; + case 64: goto st64; + case 65: goto st65; + case 66: goto st66; + case 67: goto st67; + case 68: goto st68; + case 69: goto st69; + case 70: goto st70; + case 71: goto st71; + case 72: goto st72; + case 73: goto st73; + case 74: goto st74; + case 75: goto st75; + case 76: goto st76; + case 77: goto st77; + case 78: goto st78; + case 79: goto st79; + case 80: goto st80; + case 81: goto st81; + case 82: goto st82; + case 83: goto st83; + case 84: goto st84; + case 85: goto st85; + case 0: goto st0; + case 86: goto st86; + case 87: goto st87; + case 88: goto st88; + case 89: goto st89; + case 90: goto st90; + case 16: goto st16; + case 91: goto st91; + case 17: goto st17; + case 92: goto st92; + case 18: goto st18; + case 93: goto st93; + case 94: goto st94; + case 95: goto st95; + case 19: goto st19; + case 20: goto st20; + case 96: goto st96; + case 97: goto st97; + case 98: goto st98; + case 99: goto st99; + case 100: goto st100; + case 21: goto st21; + case 101: goto st101; + case 102: goto st102; + case 103: goto st103; + case 104: goto st104; + case 105: goto st105; + case 106: goto st106; + case 107: goto st107; + case 108: goto st108; + case 109: goto st109; + case 110: goto st110; + case 111: goto st111; + case 112: goto st112; + case 113: goto st113; + case 114: goto st114; + case 115: goto st115; + case 116: goto st116; + case 117: goto st117; + case 118: goto st118; + case 119: goto st119; + case 120: goto st120; + case 121: goto st121; + case 122: goto st122; + case 123: goto st123; + case 124: goto st124; + case 125: goto st125; + case 126: goto st126; + case 127: goto st127; + case 128: goto st128; + case 129: goto st129; + case 130: goto st130; + case 131: goto st131; + case 132: goto st132; + case 133: goto st133; + case 134: goto st134; + case 135: goto st135; + case 136: goto st136; + case 137: goto st137; + case 138: goto st138; + case 139: goto st139; + case 140: goto st140; + case 141: goto st141; + case 142: goto st142; + case 143: goto st143; + case 144: goto st144; + case 145: goto st145; + case 146: goto st146; + case 147: goto st147; + case 148: goto st148; + case 149: goto st149; + case 150: goto st150; + case 151: goto st151; + case 152: goto st152; + case 153: goto st153; + case 154: goto st154; + case 155: goto st155; + case 156: goto st156; + case 157: goto st157; + case 158: goto st158; + case 159: goto st159; + case 160: goto st160; + case 161: goto st161; + case 162: goto st162; + case 163: goto st163; + case 164: goto st164; + case 165: goto st165; + case 166: goto st166; + case 167: goto st167; + case 168: goto st168; + case 169: goto st169; + case 170: goto st170; + case 171: goto st171; + case 172: goto st172; + case 173: goto st173; + case 174: goto st174; + case 22: goto st22; + default: break; + } + + if ( ++p == pe ) + goto _out; +_resume: + switch ( cs ) + { +tr2: +#line 899 "rlscan.rl" + {tokend = p+1;{ pass( IMP_Literal, tokstart, tokend ); }{p = ((tokend))-1;}} + goto st23; +tr10: +#line 898 "rlscan.rl" + {tokend = p+1;{ pass(); }{p = ((tokend))-1;}} + goto st23; +tr12: +#line 532 "rlscan.rl" + { + lastnl = p; + column = 0; + line++; + } +#line 898 "rlscan.rl" + {tokend = p+1;{ pass(); }{p = ((tokend))-1;}} + goto st23; +tr41: +#line 915 "rlscan.rl" + {tokend = p+1;{ pass( *tokstart, 0, 0 ); }{p = ((tokend))-1;}} + goto st23; +tr42: +#line 914 "rlscan.rl" + {tokend = p+1;{p = ((tokend))-1;}} + goto st23; +tr52: +#line 913 "rlscan.rl" + {tokend = p;{ pass(); }{p = ((tokend))-1;}} + goto st23; +tr53: +#line 915 "rlscan.rl" + {tokend = p;{ pass( *tokstart, 0, 0 ); }{p = ((tokend))-1;}} + goto st23; +tr55: +#line 907 "rlscan.rl" + {tokend = p;{ + updateCol(); + singleLineSpec = true; + startSection(); + {{p = ((tokend))-1;}{goto st88;}} + }{p = ((tokend))-1;}} + goto st23; +tr56: +#line 901 "rlscan.rl" + {tokend = p+1;{ + updateCol(); + singleLineSpec = false; + startSection(); + {{p = ((tokend))-1;}{goto st88;}} + }{p = ((tokend))-1;}} + goto st23; +tr57: +#line 897 "rlscan.rl" + {tokend = p;{ pass( IMP_UInt, tokstart, tokend ); }{p = ((tokend))-1;}} + goto st23; +tr58: +#line 1 "rlscan.rl" + { switch( act ) { + case 137: + { pass( IMP_Define, 0, 0 ); } + break; + case 138: + { pass( IMP_Word, tokstart, tokend ); } + break; + default: break; + } + {p = ((tokend))-1;}} + goto st23; +tr59: +#line 896 "rlscan.rl" + {tokend = p;{ pass( IMP_Word, tokstart, tokend ); }{p = ((tokend))-1;}} + goto st23; +st23: +#line 1 "rlscan.rl" + {tokstart = 0;} + if ( ++p == pe ) + goto _out23; +case 23: +#line 1 "rlscan.rl" + {tokstart = p;} +#line 1105 "rlscan.cpp" + switch( (*p) ) { + case 0: goto tr42; + case 9: goto st24; + case 10: goto tr44; + case 32: goto st24; + case 34: goto tr45; + case 37: goto st26; + case 39: goto tr47; + case 47: goto tr48; + case 95: goto tr50; + case 100: goto st32; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto st30; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr50; + } else + goto tr50; + goto tr41; +tr44: +#line 532 "rlscan.rl" + { + lastnl = p; + column = 0; + line++; + } + goto st24; +st24: + if ( ++p == pe ) + goto _out24; +case 24: +#line 1139 "rlscan.cpp" + switch( (*p) ) { + case 9: goto st24; + case 10: goto tr44; + case 32: goto st24; + } + goto tr52; +tr45: +#line 1 "rlscan.rl" + {tokend = p+1;} + goto st25; +st25: + if ( ++p == pe ) + goto _out25; +case 25: +#line 1154 "rlscan.cpp" + switch( (*p) ) { + case 10: goto tr1; + case 34: goto tr2; + case 92: goto st2; + } + goto st1; +tr1: +#line 532 "rlscan.rl" + { + lastnl = p; + column = 0; + line++; + } + goto st1; +st1: + if ( ++p == pe ) + goto _out1; +case 1: +#line 1173 "rlscan.cpp" + switch( (*p) ) { + case 10: goto tr1; + case 34: goto tr2; + case 92: goto st2; + } + goto st1; +st2: + if ( ++p == pe ) + goto _out2; +case 2: + if ( (*p) == 10 ) + goto tr1; + goto st1; +st26: + if ( ++p == pe ) + goto _out26; +case 26: + if ( (*p) == 37 ) + goto st27; + goto tr53; +st27: + if ( ++p == pe ) + goto _out27; +case 27: + if ( (*p) == 123 ) + goto tr56; + goto tr55; +tr47: +#line 1 "rlscan.rl" + {tokend = p+1;} + goto st28; +st28: + if ( ++p == pe ) + goto _out28; +case 28: +#line 1209 "rlscan.cpp" + switch( (*p) ) { + case 10: goto tr5; + case 39: goto tr2; + case 92: goto st4; + } + goto st3; +tr5: +#line 532 "rlscan.rl" + { + lastnl = p; + column = 0; + line++; + } + goto st3; +st3: + if ( ++p == pe ) + goto _out3; +case 3: +#line 1228 "rlscan.cpp" + switch( (*p) ) { + case 10: goto tr5; + case 39: goto tr2; + case 92: goto st4; + } + goto st3; +st4: + if ( ++p == pe ) + goto _out4; +case 4: + if ( (*p) == 10 ) + goto tr5; + goto st3; +tr48: +#line 1 "rlscan.rl" + {tokend = p+1;} + goto st29; +st29: + if ( ++p == pe ) + goto _out29; +case 29: +#line 1250 "rlscan.cpp" + switch( (*p) ) { + case 42: goto st5; + case 47: goto st7; + } + goto tr53; +tr8: +#line 532 "rlscan.rl" + { + lastnl = p; + column = 0; + line++; + } + goto st5; +st5: + if ( ++p == pe ) + goto _out5; +case 5: +#line 1268 "rlscan.cpp" + switch( (*p) ) { + case 10: goto tr8; + case 42: goto st6; + } + goto st5; +st6: + if ( ++p == pe ) + goto _out6; +case 6: + switch( (*p) ) { + case 10: goto tr8; + case 42: goto st6; + case 47: goto tr10; + } + goto st5; +st7: + if ( ++p == pe ) + goto _out7; +case 7: + if ( (*p) == 10 ) + goto tr12; + goto st7; +st30: + if ( ++p == pe ) + goto _out30; +case 30: + if ( 48 <= (*p) && (*p) <= 57 ) + goto st30; + goto tr57; +tr50: +#line 1 "rlscan.rl" + {tokend = p+1;} +#line 896 "rlscan.rl" + {act = 138;} + goto st31; +tr64: +#line 1 "rlscan.rl" + {tokend = p+1;} +#line 895 "rlscan.rl" + {act = 137;} + goto st31; +st31: + if ( ++p == pe ) + goto _out31; +case 31: +#line 1314 "rlscan.cpp" + if ( (*p) == 95 ) + goto tr50; + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr50; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr50; + } else + goto tr50; + goto tr58; +st32: + if ( ++p == pe ) + goto _out32; +case 32: + switch( (*p) ) { + case 95: goto tr50; + case 101: goto st33; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr50; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr50; + } else + goto tr50; + goto tr59; +st33: + if ( ++p == pe ) + goto _out33; +case 33: + switch( (*p) ) { + case 95: goto tr50; + case 102: goto st34; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr50; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr50; + } else + goto tr50; + goto tr59; +st34: + if ( ++p == pe ) + goto _out34; +case 34: + switch( (*p) ) { + case 95: goto tr50; + case 105: goto st35; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr50; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr50; + } else + goto tr50; + goto tr59; +st35: + if ( ++p == pe ) + goto _out35; +case 35: + switch( (*p) ) { + case 95: goto tr50; + case 110: goto st36; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr50; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr50; + } else + goto tr50; + goto tr59; +st36: + if ( ++p == pe ) + goto _out36; +case 36: + switch( (*p) ) { + case 95: goto tr50; + case 101: goto tr64; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr50; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr50; + } else + goto tr50; + goto tr59; +tr15: +#line 606 "rlscan.rl" + {tokend = p+1;{ token( IL_Literal, tokstart, tokend ); }{p = ((tokend))-1;}} + goto st37; +tr23: +#line 612 "rlscan.rl" + {tokend = p+1;{ token( IL_Comment, tokstart, tokend ); }{p = ((tokend))-1;}} + goto st37; +tr25: +#line 532 "rlscan.rl" + { + lastnl = p; + column = 0; + line++; + } +#line 612 "rlscan.rl" + {tokend = p+1;{ token( IL_Comment, tokstart, tokend ); }{p = ((tokend))-1;}} + goto st37; +tr26: +#line 602 "rlscan.rl" + {{ token( TK_UInt, tokstart, tokend ); }{p = ((tokend))-1;}} + goto st37; +tr65: +#line 659 "rlscan.rl" + {tokend = p+1;{ token( IL_Symbol, tokstart, tokend ); }{p = ((tokend))-1;}} + goto st37; +tr66: +#line 654 "rlscan.rl" + {tokend = p+1;{ + scan_error() << "unterminated code block" << endl; + }{p = ((tokend))-1;}} + goto st37; +tr71: +#line 634 "rlscan.rl" + {tokend = p+1;{ token( *tokstart, tokstart, tokend ); }{p = ((tokend))-1;}} + goto st37; +tr72: +#line 629 "rlscan.rl" + {tokend = p+1;{ + whitespaceOn = true; + token( *tokstart, tokstart, tokend ); + }{p = ((tokend))-1;}} + goto st37; +tr77: +#line 622 "rlscan.rl" + {tokend = p+1;{ + whitespaceOn = true; + token( *tokstart, tokstart, tokend ); + if ( inlineBlockType == SemiTerminated ) + {{p = ((tokend))-1;}{goto st88;}} + }{p = ((tokend))-1;}} + goto st37; +tr80: +#line 636 "rlscan.rl" + {tokend = p+1;{ + token( IL_Symbol, tokstart, tokend ); + curly_count += 1; + }{p = ((tokend))-1;}} + goto st37; +tr81: +#line 641 "rlscan.rl" + {tokend = p+1;{ + if ( --curly_count == 0 && inlineBlockType == CurlyDelimited ) { + /* Inline code block ends. */ + token( '}' ); + {{p = ((tokend))-1;}{goto st88;}} + } + else { + /* Either a semi terminated inline block or only the closing + * brace of some inner scope, not the block's closing brace. */ + token( IL_Symbol, tokstart, tokend ); + } + }{p = ((tokend))-1;}} + goto st37; +tr82: +#line 608 "rlscan.rl" + {tokend = p;{ + if ( whitespaceOn ) + token( IL_WhiteSpace, tokstart, tokend ); + }{p = ((tokend))-1;}} + goto st37; +tr83: +#line 659 "rlscan.rl" + {tokend = p;{ token( IL_Symbol, tokstart, tokend ); }{p = ((tokend))-1;}} + goto st37; +tr84: +#line 602 "rlscan.rl" + {tokend = p;{ token( TK_UInt, tokstart, tokend ); }{p = ((tokend))-1;}} + goto st37; +tr86: +#line 603 "rlscan.rl" + {tokend = p;{ token( TK_Hex, tokstart, tokend ); }{p = ((tokend))-1;}} + goto st37; +tr87: +#line 614 "rlscan.rl" + {tokend = p+1;{ token( TK_NameSep, tokstart, tokend ); }{p = ((tokend))-1;}} + goto st37; +tr88: +#line 1 "rlscan.rl" + { switch( act ) { + case 1: + { token( KW_PChar ); } + break; + case 3: + { token( KW_CurState ); } + break; + case 4: + { token( KW_TargState ); } + break; + case 5: + { + whitespaceOn = false; + token( KW_Entry ); + } + break; + case 6: + { + whitespaceOn = false; + token( KW_Hold ); + } + break; + case 7: + { token( KW_Exec, 0, 0 ); } + break; + case 8: + { + whitespaceOn = false; + token( KW_Goto ); + } + break; + case 9: + { + whitespaceOn = false; + token( KW_Next ); + } + break; + case 10: + { + whitespaceOn = false; + token( KW_Call ); + } + break; + case 11: + { + whitespaceOn = false; + token( KW_Ret ); + } + break; + case 12: + { + whitespaceOn = false; + token( KW_Break ); + } + break; + case 13: + { token( TK_Word, tokstart, tokend ); } + break; + default: break; + } + {p = ((tokend))-1;}} + goto st37; +tr89: +#line 600 "rlscan.rl" + {tokend = p;{ token( TK_Word, tokstart, tokend ); }{p = ((tokend))-1;}} + goto st37; +tr103: +#line 565 "rlscan.rl" + {tokend = p;{ token( KW_Char ); }{p = ((tokend))-1;}} + goto st37; +st37: +#line 1 "rlscan.rl" + {tokstart = 0;} + if ( ++p == pe ) + goto _out37; +case 37: +#line 1 "rlscan.rl" + {tokstart = p;} +#line 1588 "rlscan.cpp" + switch( (*p) ) { + case 0: goto tr66; + case 9: goto st38; + case 10: goto tr68; + case 32: goto st38; + case 34: goto tr69; + case 39: goto tr70; + case 40: goto tr71; + case 44: goto tr71; + case 47: goto tr73; + case 48: goto tr74; + case 58: goto st45; + case 59: goto tr77; + case 95: goto tr78; + case 102: goto st47; + case 123: goto tr80; + case 125: goto tr81; + } + if ( (*p) < 49 ) { + if ( 41 <= (*p) && (*p) <= 42 ) + goto tr72; + } else if ( (*p) > 57 ) { + if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr78; + } else if ( (*p) >= 65 ) + goto tr78; + } else + goto st43; + goto tr65; +tr68: +#line 532 "rlscan.rl" + { + lastnl = p; + column = 0; + line++; + } + goto st38; +st38: + if ( ++p == pe ) + goto _out38; +case 38: +#line 1631 "rlscan.cpp" + switch( (*p) ) { + case 9: goto st38; + case 10: goto tr68; + case 32: goto st38; + } + goto tr82; +tr69: +#line 1 "rlscan.rl" + {tokend = p+1;} + goto st39; +st39: + if ( ++p == pe ) + goto _out39; +case 39: +#line 1646 "rlscan.cpp" + switch( (*p) ) { + case 10: goto tr14; + case 34: goto tr15; + case 92: goto st9; + } + goto st8; +tr14: +#line 532 "rlscan.rl" + { + lastnl = p; + column = 0; + line++; + } + goto st8; +st8: + if ( ++p == pe ) + goto _out8; +case 8: +#line 1665 "rlscan.cpp" + switch( (*p) ) { + case 10: goto tr14; + case 34: goto tr15; + case 92: goto st9; + } + goto st8; +st9: + if ( ++p == pe ) + goto _out9; +case 9: + if ( (*p) == 10 ) + goto tr14; + goto st8; +tr70: +#line 1 "rlscan.rl" + {tokend = p+1;} + goto st40; +st40: + if ( ++p == pe ) + goto _out40; +case 40: +#line 1687 "rlscan.cpp" + switch( (*p) ) { + case 10: goto tr18; + case 39: goto tr15; + case 92: goto st11; + } + goto st10; +tr18: +#line 532 "rlscan.rl" + { + lastnl = p; + column = 0; + line++; + } + goto st10; +st10: + if ( ++p == pe ) + goto _out10; +case 10: +#line 1706 "rlscan.cpp" + switch( (*p) ) { + case 10: goto tr18; + case 39: goto tr15; + case 92: goto st11; + } + goto st10; +st11: + if ( ++p == pe ) + goto _out11; +case 11: + if ( (*p) == 10 ) + goto tr18; + goto st10; +tr73: +#line 1 "rlscan.rl" + {tokend = p+1;} + goto st41; +st41: + if ( ++p == pe ) + goto _out41; +case 41: +#line 1728 "rlscan.cpp" + switch( (*p) ) { + case 42: goto st12; + case 47: goto st14; + } + goto tr83; +tr21: +#line 532 "rlscan.rl" + { + lastnl = p; + column = 0; + line++; + } + goto st12; +st12: + if ( ++p == pe ) + goto _out12; +case 12: +#line 1746 "rlscan.cpp" + switch( (*p) ) { + case 10: goto tr21; + case 42: goto st13; + } + goto st12; +st13: + if ( ++p == pe ) + goto _out13; +case 13: + switch( (*p) ) { + case 10: goto tr21; + case 42: goto st13; + case 47: goto tr23; + } + goto st12; +st14: + if ( ++p == pe ) + goto _out14; +case 14: + if ( (*p) == 10 ) + goto tr25; + goto st14; +tr74: +#line 1 "rlscan.rl" + {tokend = p+1;} + goto st42; +st42: + if ( ++p == pe ) + goto _out42; +case 42: +#line 1777 "rlscan.cpp" + if ( (*p) == 120 ) + goto st15; + if ( 48 <= (*p) && (*p) <= 57 ) + goto st43; + goto tr84; +st43: + if ( ++p == pe ) + goto _out43; +case 43: + if ( 48 <= (*p) && (*p) <= 57 ) + goto st43; + goto tr84; +st15: + if ( ++p == pe ) + goto _out15; +case 15: + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto st44; + } else if ( (*p) > 70 ) { + if ( 97 <= (*p) && (*p) <= 102 ) + goto st44; + } else + goto st44; + goto tr26; +st44: + if ( ++p == pe ) + goto _out44; +case 44: + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto st44; + } else if ( (*p) > 70 ) { + if ( 97 <= (*p) && (*p) <= 102 ) + goto st44; + } else + goto st44; + goto tr86; +st45: + if ( ++p == pe ) + goto _out45; +case 45: + if ( (*p) == 58 ) + goto tr87; + goto tr83; +tr78: +#line 1 "rlscan.rl" + {tokend = p+1;} +#line 600 "rlscan.rl" + {act = 13;} + goto st46; +tr102: +#line 1 "rlscan.rl" + {tokend = p+1;} +#line 595 "rlscan.rl" + {act = 12;} + goto st46; +tr107: +#line 1 "rlscan.rl" + {tokend = p+1;} +#line 587 "rlscan.rl" + {act = 10;} + goto st46; +tr109: +#line 1 "rlscan.rl" + {tokend = p+1;} +#line 566 "rlscan.rl" + {act = 3;} + goto st46; +tr114: +#line 1 "rlscan.rl" + {tokend = p+1;} +#line 568 "rlscan.rl" + {act = 5;} + goto st46; +tr116: +#line 1 "rlscan.rl" + {tokend = p+1;} +#line 578 "rlscan.rl" + {act = 7;} + goto st46; +tr119: +#line 1 "rlscan.rl" + {tokend = p+1;} +#line 579 "rlscan.rl" + {act = 8;} + goto st46; +tr122: +#line 1 "rlscan.rl" + {tokend = p+1;} +#line 574 "rlscan.rl" + {act = 6;} + goto st46; +tr125: +#line 1 "rlscan.rl" + {tokend = p+1;} +#line 583 "rlscan.rl" + {act = 9;} + goto st46; +tr126: +#line 1 "rlscan.rl" + {tokend = p+1;} +#line 564 "rlscan.rl" + {act = 1;} + goto st46; +tr128: +#line 1 "rlscan.rl" + {tokend = p+1;} +#line 591 "rlscan.rl" + {act = 11;} + goto st46; +tr132: +#line 1 "rlscan.rl" + {tokend = p+1;} +#line 567 "rlscan.rl" + {act = 4;} + goto st46; +st46: + if ( ++p == pe ) + goto _out46; +case 46: +#line 1899 "rlscan.cpp" + if ( (*p) == 95 ) + goto tr78; + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr78; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr78; + } else + goto tr78; + goto tr88; +st47: + if ( ++p == pe ) + goto _out47; +case 47: + switch( (*p) ) { + case 95: goto tr78; + case 98: goto st48; + case 99: goto st52; + case 101: goto st57; + case 103: goto st63; + case 104: goto st66; + case 110: goto st69; + case 112: goto st72; + case 114: goto st73; + case 116: goto st75; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr78; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr78; + } else + goto tr78; + goto tr89; +st48: + if ( ++p == pe ) + goto _out48; +case 48: + switch( (*p) ) { + case 95: goto tr78; + case 114: goto st49; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr78; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr78; + } else + goto tr78; + goto tr89; +st49: + if ( ++p == pe ) + goto _out49; +case 49: + switch( (*p) ) { + case 95: goto tr78; + case 101: goto st50; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr78; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr78; + } else + goto tr78; + goto tr89; +st50: + if ( ++p == pe ) + goto _out50; +case 50: + switch( (*p) ) { + case 95: goto tr78; + case 97: goto st51; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr78; + } else if ( (*p) > 90 ) { + if ( 98 <= (*p) && (*p) <= 122 ) + goto tr78; + } else + goto tr78; + goto tr89; +st51: + if ( ++p == pe ) + goto _out51; +case 51: + switch( (*p) ) { + case 95: goto tr78; + case 107: goto tr102; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr78; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr78; + } else + goto tr78; + goto tr89; +st52: + if ( ++p == pe ) + goto _out52; +case 52: + switch( (*p) ) { + case 95: goto tr78; + case 97: goto st53; + case 117: goto st55; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr78; + } else if ( (*p) > 90 ) { + if ( 98 <= (*p) && (*p) <= 122 ) + goto tr78; + } else + goto tr78; + goto tr103; +st53: + if ( ++p == pe ) + goto _out53; +case 53: + switch( (*p) ) { + case 95: goto tr78; + case 108: goto st54; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr78; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr78; + } else + goto tr78; + goto tr89; +st54: + if ( ++p == pe ) + goto _out54; +case 54: + switch( (*p) ) { + case 95: goto tr78; + case 108: goto tr107; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr78; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr78; + } else + goto tr78; + goto tr89; +st55: + if ( ++p == pe ) + goto _out55; +case 55: + switch( (*p) ) { + case 95: goto tr78; + case 114: goto st56; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr78; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr78; + } else + goto tr78; + goto tr89; +st56: + if ( ++p == pe ) + goto _out56; +case 56: + switch( (*p) ) { + case 95: goto tr78; + case 115: goto tr109; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr78; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr78; + } else + goto tr78; + goto tr89; +st57: + if ( ++p == pe ) + goto _out57; +case 57: + switch( (*p) ) { + case 95: goto tr78; + case 110: goto st58; + case 120: goto st61; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr78; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr78; + } else + goto tr78; + goto tr89; +st58: + if ( ++p == pe ) + goto _out58; +case 58: + switch( (*p) ) { + case 95: goto tr78; + case 116: goto st59; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr78; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr78; + } else + goto tr78; + goto tr89; +st59: + if ( ++p == pe ) + goto _out59; +case 59: + switch( (*p) ) { + case 95: goto tr78; + case 114: goto st60; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr78; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr78; + } else + goto tr78; + goto tr89; +st60: + if ( ++p == pe ) + goto _out60; +case 60: + switch( (*p) ) { + case 95: goto tr78; + case 121: goto tr114; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr78; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr78; + } else + goto tr78; + goto tr89; +st61: + if ( ++p == pe ) + goto _out61; +case 61: + switch( (*p) ) { + case 95: goto tr78; + case 101: goto st62; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr78; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr78; + } else + goto tr78; + goto tr89; +st62: + if ( ++p == pe ) + goto _out62; +case 62: + switch( (*p) ) { + case 95: goto tr78; + case 99: goto tr116; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr78; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr78; + } else + goto tr78; + goto tr89; +st63: + if ( ++p == pe ) + goto _out63; +case 63: + switch( (*p) ) { + case 95: goto tr78; + case 111: goto st64; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr78; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr78; + } else + goto tr78; + goto tr89; +st64: + if ( ++p == pe ) + goto _out64; +case 64: + switch( (*p) ) { + case 95: goto tr78; + case 116: goto st65; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr78; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr78; + } else + goto tr78; + goto tr89; +st65: + if ( ++p == pe ) + goto _out65; +case 65: + switch( (*p) ) { + case 95: goto tr78; + case 111: goto tr119; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr78; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr78; + } else + goto tr78; + goto tr89; +st66: + if ( ++p == pe ) + goto _out66; +case 66: + switch( (*p) ) { + case 95: goto tr78; + case 111: goto st67; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr78; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr78; + } else + goto tr78; + goto tr89; +st67: + if ( ++p == pe ) + goto _out67; +case 67: + switch( (*p) ) { + case 95: goto tr78; + case 108: goto st68; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr78; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr78; + } else + goto tr78; + goto tr89; +st68: + if ( ++p == pe ) + goto _out68; +case 68: + switch( (*p) ) { + case 95: goto tr78; + case 100: goto tr122; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr78; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr78; + } else + goto tr78; + goto tr89; +st69: + if ( ++p == pe ) + goto _out69; +case 69: + switch( (*p) ) { + case 95: goto tr78; + case 101: goto st70; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr78; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr78; + } else + goto tr78; + goto tr89; +st70: + if ( ++p == pe ) + goto _out70; +case 70: + switch( (*p) ) { + case 95: goto tr78; + case 120: goto st71; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr78; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr78; + } else + goto tr78; + goto tr89; +st71: + if ( ++p == pe ) + goto _out71; +case 71: + switch( (*p) ) { + case 95: goto tr78; + case 116: goto tr125; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr78; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr78; + } else + goto tr78; + goto tr89; +st72: + if ( ++p == pe ) + goto _out72; +case 72: + switch( (*p) ) { + case 95: goto tr78; + case 99: goto tr126; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr78; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr78; + } else + goto tr78; + goto tr89; +st73: + if ( ++p == pe ) + goto _out73; +case 73: + switch( (*p) ) { + case 95: goto tr78; + case 101: goto st74; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr78; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr78; + } else + goto tr78; + goto tr89; +st74: + if ( ++p == pe ) + goto _out74; +case 74: + switch( (*p) ) { + case 95: goto tr78; + case 116: goto tr128; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr78; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr78; + } else + goto tr78; + goto tr89; +st75: + if ( ++p == pe ) + goto _out75; +case 75: + switch( (*p) ) { + case 95: goto tr78; + case 97: goto st76; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr78; + } else if ( (*p) > 90 ) { + if ( 98 <= (*p) && (*p) <= 122 ) + goto tr78; + } else + goto tr78; + goto tr89; +st76: + if ( ++p == pe ) + goto _out76; +case 76: + switch( (*p) ) { + case 95: goto tr78; + case 114: goto st77; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr78; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr78; + } else + goto tr78; + goto tr89; +st77: + if ( ++p == pe ) + goto _out77; +case 77: + switch( (*p) ) { + case 95: goto tr78; + case 103: goto st78; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr78; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr78; + } else + goto tr78; + goto tr89; +st78: + if ( ++p == pe ) + goto _out78; +case 78: + switch( (*p) ) { + case 95: goto tr78; + case 115: goto tr132; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr78; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr78; + } else + goto tr78; + goto tr89; +tr133: +#line 686 "rlscan.rl" + {tokend = p+1;{ token( RE_Char, tokstart, tokend ); }{p = ((tokend))-1;}} + goto st79; +tr134: +#line 681 "rlscan.rl" + {tokend = p+1;{ + scan_error() << "unterminated OR literal" << endl; + }{p = ((tokend))-1;}} + goto st79; +tr135: +#line 676 "rlscan.rl" + {tokend = p+1;{ token( RE_Dash, 0, 0 ); }{p = ((tokend))-1;}} + goto st79; +tr137: +#line 679 "rlscan.rl" + {tokend = p+1;{ token( RE_SqClose ); {{p = ((tokend))-1;}{cs = stack[--top]; goto _again;}} }{p = ((tokend))-1;}} + goto st79; +tr138: +#line 673 "rlscan.rl" + {tokend = p+1;{ token( RE_Char, tokstart+1, tokend ); }{p = ((tokend))-1;}} + goto st79; +tr139: +#line 672 "rlscan.rl" + {tokend = p+1;{ updateCol(); }{p = ((tokend))-1;}} + goto st79; +tr140: +#line 664 "rlscan.rl" + {tokend = p+1;{ token( RE_Char, '\0' ); }{p = ((tokend))-1;}} + goto st79; +tr141: +#line 665 "rlscan.rl" + {tokend = p+1;{ token( RE_Char, '\a' ); }{p = ((tokend))-1;}} + goto st79; +tr142: +#line 666 "rlscan.rl" + {tokend = p+1;{ token( RE_Char, '\b' ); }{p = ((tokend))-1;}} + goto st79; +tr143: +#line 670 "rlscan.rl" + {tokend = p+1;{ token( RE_Char, '\f' ); }{p = ((tokend))-1;}} + goto st79; +tr144: +#line 668 "rlscan.rl" + {tokend = p+1;{ token( RE_Char, '\n' ); }{p = ((tokend))-1;}} + goto st79; +tr145: +#line 671 "rlscan.rl" + {tokend = p+1;{ token( RE_Char, '\r' ); }{p = ((tokend))-1;}} + goto st79; +tr146: +#line 667 "rlscan.rl" + {tokend = p+1;{ token( RE_Char, '\t' ); }{p = ((tokend))-1;}} + goto st79; +tr147: +#line 669 "rlscan.rl" + {tokend = p+1;{ token( RE_Char, '\v' ); }{p = ((tokend))-1;}} + goto st79; +st79: +#line 1 "rlscan.rl" + {tokstart = 0;} + if ( ++p == pe ) + goto _out79; +case 79: +#line 1 "rlscan.rl" + {tokstart = p;} +#line 2531 "rlscan.cpp" + switch( (*p) ) { + case 0: goto tr134; + case 45: goto tr135; + case 92: goto st80; + case 93: goto tr137; + } + goto tr133; +st80: + if ( ++p == pe ) + goto _out80; +case 80: + switch( (*p) ) { + case 10: goto tr139; + case 48: goto tr140; + case 97: goto tr141; + case 98: goto tr142; + case 102: goto tr143; + case 110: goto tr144; + case 114: goto tr145; + case 116: goto tr146; + case 118: goto tr147; + } + goto tr138; +tr148: +#line 721 "rlscan.rl" + {tokend = p+1;{ token( RE_Char, tokstart, tokend ); }{p = ((tokend))-1;}} + goto st81; +tr149: +#line 716 "rlscan.rl" + {tokend = p+1;{ + scan_error() << "unterminated regular expression" << endl; + }{p = ((tokend))-1;}} + goto st81; +tr150: +#line 711 "rlscan.rl" + {tokend = p+1;{ token( RE_Star ); }{p = ((tokend))-1;}} + goto st81; +tr151: +#line 710 "rlscan.rl" + {tokend = p+1;{ token( RE_Dot ); }{p = ((tokend))-1;}} + goto st81; +tr155: +#line 704 "rlscan.rl" + {tokend = p;{ + token( RE_Slash, tokstart, tokend ); + {{p = ((tokend))-1;}{goto st88;}} + }{p = ((tokend))-1;}} + goto st81; +tr156: +#line 704 "rlscan.rl" + {tokend = p+1;{ + token( RE_Slash, tokstart, tokend ); + {{p = ((tokend))-1;}{goto st88;}} + }{p = ((tokend))-1;}} + goto st81; +tr157: +#line 713 "rlscan.rl" + {tokend = p;{ token( RE_SqOpen ); {{p = ((tokend))-1;}{stack[top++] = 81; goto st79;}} }{p = ((tokend))-1;}} + goto st81; +tr158: +#line 714 "rlscan.rl" + {tokend = p+1;{ token( RE_SqOpenNeg ); {{p = ((tokend))-1;}{stack[top++] = 81; goto st79;}} }{p = ((tokend))-1;}} + goto st81; +tr159: +#line 701 "rlscan.rl" + {tokend = p+1;{ token( RE_Char, tokstart+1, tokend ); }{p = ((tokend))-1;}} + goto st81; +tr160: +#line 700 "rlscan.rl" + {tokend = p+1;{ updateCol(); }{p = ((tokend))-1;}} + goto st81; +tr161: +#line 692 "rlscan.rl" + {tokend = p+1;{ token( RE_Char, '\0' ); }{p = ((tokend))-1;}} + goto st81; +tr162: +#line 693 "rlscan.rl" + {tokend = p+1;{ token( RE_Char, '\a' ); }{p = ((tokend))-1;}} + goto st81; +tr163: +#line 694 "rlscan.rl" + {tokend = p+1;{ token( RE_Char, '\b' ); }{p = ((tokend))-1;}} + goto st81; +tr164: +#line 698 "rlscan.rl" + {tokend = p+1;{ token( RE_Char, '\f' ); }{p = ((tokend))-1;}} + goto st81; +tr165: +#line 696 "rlscan.rl" + {tokend = p+1;{ token( RE_Char, '\n' ); }{p = ((tokend))-1;}} + goto st81; +tr166: +#line 699 "rlscan.rl" + {tokend = p+1;{ token( RE_Char, '\r' ); }{p = ((tokend))-1;}} + goto st81; +tr167: +#line 695 "rlscan.rl" + {tokend = p+1;{ token( RE_Char, '\t' ); }{p = ((tokend))-1;}} + goto st81; +tr168: +#line 697 "rlscan.rl" + {tokend = p+1;{ token( RE_Char, '\v' ); }{p = ((tokend))-1;}} + goto st81; +st81: +#line 1 "rlscan.rl" + {tokstart = 0;} + if ( ++p == pe ) + goto _out81; +case 81: +#line 1 "rlscan.rl" + {tokstart = p;} +#line 2643 "rlscan.cpp" + switch( (*p) ) { + case 0: goto tr149; + case 42: goto tr150; + case 46: goto tr151; + case 47: goto st82; + case 91: goto st83; + case 92: goto st84; + } + goto tr148; +st82: + if ( ++p == pe ) + goto _out82; +case 82: + if ( (*p) == 105 ) + goto tr156; + goto tr155; +st83: + if ( ++p == pe ) + goto _out83; +case 83: + if ( (*p) == 94 ) + goto tr158; + goto tr157; +st84: + if ( ++p == pe ) + goto _out84; +case 84: + switch( (*p) ) { + case 10: goto tr160; + case 48: goto tr161; + case 97: goto tr162; + case 98: goto tr163; + case 102: goto tr164; + case 110: goto tr165; + case 114: goto tr166; + case 116: goto tr167; + case 118: goto tr168; + } + goto tr159; +tr169: +#line 730 "rlscan.rl" + {tokend = p+1;{ + scan_error() << "unterminated write statement" << endl; + }{p = ((tokend))-1;}} + goto st85; +tr172: +#line 728 "rlscan.rl" + {tokend = p+1;{ token( ';' ); {{p = ((tokend))-1;}{goto st88;}} }{p = ((tokend))-1;}} + goto st85; +tr174: +#line 727 "rlscan.rl" + {tokend = p;{ updateCol(); }{p = ((tokend))-1;}} + goto st85; +tr175: +#line 726 "rlscan.rl" + {tokend = p;{ token( TK_Word, tokstart, tokend ); }{p = ((tokend))-1;}} + goto st85; +st85: +#line 1 "rlscan.rl" + {tokstart = 0;} + if ( ++p == pe ) + goto _out85; +case 85: +#line 1 "rlscan.rl" + {tokstart = p;} +#line 2709 "rlscan.cpp" + switch( (*p) ) { + case 0: goto tr169; + case 32: goto st86; + case 59: goto tr172; + case 95: goto st87; + } + if ( (*p) < 65 ) { + if ( 9 <= (*p) && (*p) <= 10 ) + goto st86; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto st87; + } else + goto st87; + goto st0; +st0: + goto _out0; +st86: + if ( ++p == pe ) + goto _out86; +case 86: + if ( (*p) == 32 ) + goto st86; + if ( 9 <= (*p) && (*p) <= 10 ) + goto st86; + goto tr174; +st87: + if ( ++p == pe ) + goto _out87; +case 87: + if ( (*p) == 95 ) + goto st87; + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto st87; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto st87; + } else + goto st87; + goto tr175; +tr33: +#line 532 "rlscan.rl" + { + lastnl = p; + column = 0; + line++; + } +#line 790 "rlscan.rl" + {tokend = p+1;{ updateCol(); }{p = ((tokend))-1;}} + goto st88; +tr37: +#line 777 "rlscan.rl" + {{ token( TK_UInt, tokstart, tokend ); }{p = ((tokend))-1;}} + goto st88; +tr39: +#line 890 "rlscan.rl" + {{ token( *tokstart ); }{p = ((tokend))-1;}} + goto st88; +tr40: +#line 858 "rlscan.rl" + {tokend = p+1;{ + updateCol(); + endSection(); + {{p = ((tokend))-1;}{goto st23;}} + }{p = ((tokend))-1;}} + goto st88; +tr176: +#line 890 "rlscan.rl" + {tokend = p+1;{ token( *tokstart ); }{p = ((tokend))-1;}} + goto st88; +tr177: +#line 886 "rlscan.rl" + {tokend = p+1;{ + scan_error() << "unterminated ragel section" << endl; + }{p = ((tokend))-1;}} + goto st88; +tr179: +#line 532 "rlscan.rl" + { + lastnl = p; + column = 0; + line++; + } +#line 867 "rlscan.rl" + {tokend = p+1;{ + updateCol(); + if ( singleLineSpec ) { + endSection(); + {{p = ((tokend))-1;}{goto st23;}} + } + }{p = ((tokend))-1;}} + goto st88; +tr188: +#line 787 "rlscan.rl" + {tokend = p+1;{ token( RE_Slash ); {{p = ((tokend))-1;}{goto st81;}} }{p = ((tokend))-1;}} + goto st88; +tr208: +#line 875 "rlscan.rl" + {tokend = p+1;{ + if ( lastToken == KW_Export || lastToken == KW_Entry ) + token( '{' ); + else { + token( '{' ); + curly_count = 1; + inlineBlockType = CurlyDelimited; + {{p = ((tokend))-1;}{goto st37;}} + } + }{p = ((tokend))-1;}} + goto st88; +tr211: +#line 864 "rlscan.rl" + {tokend = p;{ updateCol(); }{p = ((tokend))-1;}} + goto st88; +tr212: +#line 782 "rlscan.rl" + {tokend = p;{ token( TK_Literal, tokstart, tokend ); }{p = ((tokend))-1;}} + goto st88; +tr213: +#line 782 "rlscan.rl" + {tokend = p+1;{ token( TK_Literal, tokstart, tokend ); }{p = ((tokend))-1;}} + goto st88; +tr214: +#line 890 "rlscan.rl" + {tokend = p;{ token( *tokstart ); }{p = ((tokend))-1;}} + goto st88; +tr215: +#line 820 "rlscan.rl" + {tokend = p+1;{ token( TK_AllGblError ); }{p = ((tokend))-1;}} + goto st88; +tr216: +#line 804 "rlscan.rl" + {tokend = p+1;{ token( TK_AllFromState ); }{p = ((tokend))-1;}} + goto st88; +tr217: +#line 812 "rlscan.rl" + {tokend = p+1;{ token( TK_AllEOF ); }{p = ((tokend))-1;}} + goto st88; +tr218: +#line 839 "rlscan.rl" + {tokend = p+1;{ token( TK_AllCond ); }{p = ((tokend))-1;}} + goto st88; +tr219: +#line 828 "rlscan.rl" + {tokend = p+1;{ token( TK_AllLocalError ); }{p = ((tokend))-1;}} + goto st88; +tr220: +#line 796 "rlscan.rl" + {tokend = p+1;{ token( TK_AllToState ); }{p = ((tokend))-1;}} + goto st88; +tr221: +#line 821 "rlscan.rl" + {tokend = p+1;{ token( TK_FinalGblError ); }{p = ((tokend))-1;}} + goto st88; +tr222: +#line 805 "rlscan.rl" + {tokend = p+1;{ token( TK_FinalFromState ); }{p = ((tokend))-1;}} + goto st88; +tr223: +#line 813 "rlscan.rl" + {tokend = p+1;{ token( TK_FinalEOF ); }{p = ((tokend))-1;}} + goto st88; +tr224: +#line 840 "rlscan.rl" + {tokend = p+1;{ token( TK_LeavingCond ); }{p = ((tokend))-1;}} + goto st88; +tr225: +#line 829 "rlscan.rl" + {tokend = p+1;{ token( TK_FinalLocalError ); }{p = ((tokend))-1;}} + goto st88; +tr226: +#line 797 "rlscan.rl" + {tokend = p+1;{ token( TK_FinalToState ); }{p = ((tokend))-1;}} + goto st88; +tr227: +#line 843 "rlscan.rl" + {tokend = p+1;{ token( TK_StarStar ); }{p = ((tokend))-1;}} + goto st88; +tr228: +#line 844 "rlscan.rl" + {tokend = p+1;{ token( TK_DashDash ); }{p = ((tokend))-1;}} + goto st88; +tr229: +#line 845 "rlscan.rl" + {tokend = p+1;{ token( TK_Arrow ); }{p = ((tokend))-1;}} + goto st88; +tr230: +#line 842 "rlscan.rl" + {tokend = p+1;{ token( TK_DotDot ); }{p = ((tokend))-1;}} + goto st88; +tr231: +#line 777 "rlscan.rl" + {tokend = p;{ token( TK_UInt, tokstart, tokend ); }{p = ((tokend))-1;}} + goto st88; +tr233: +#line 778 "rlscan.rl" + {tokend = p;{ token( TK_Hex, tokstart, tokend ); }{p = ((tokend))-1;}} + goto st88; +tr234: +#line 856 "rlscan.rl" + {tokend = p+1;{ token( TK_NameSep, tokstart, tokend ); }{p = ((tokend))-1;}} + goto st88; +tr235: +#line 792 "rlscan.rl" + {tokend = p+1;{ token( TK_ColonEquals ); }{p = ((tokend))-1;}} + goto st88; +tr237: +#line 848 "rlscan.rl" + {tokend = p;{ token( TK_ColonGt ); }{p = ((tokend))-1;}} + goto st88; +tr238: +#line 849 "rlscan.rl" + {tokend = p+1;{ token( TK_ColonGtGt ); }{p = ((tokend))-1;}} + goto st88; +tr239: +#line 822 "rlscan.rl" + {tokend = p+1;{ token( TK_NotStartGblError ); }{p = ((tokend))-1;}} + goto st88; +tr240: +#line 806 "rlscan.rl" + {tokend = p+1;{ token( TK_NotStartFromState ); }{p = ((tokend))-1;}} + goto st88; +tr241: +#line 814 "rlscan.rl" + {tokend = p+1;{ token( TK_NotStartEOF ); }{p = ((tokend))-1;}} + goto st88; +tr242: +#line 850 "rlscan.rl" + {tokend = p+1;{ token( TK_LtColon ); }{p = ((tokend))-1;}} + goto st88; +tr244: +#line 830 "rlscan.rl" + {tokend = p+1;{ token( TK_NotStartLocalError ); }{p = ((tokend))-1;}} + goto st88; +tr245: +#line 798 "rlscan.rl" + {tokend = p+1;{ token( TK_NotStartToState ); }{p = ((tokend))-1;}} + goto st88; +tr246: +#line 835 "rlscan.rl" + {tokend = p;{ token( TK_Middle ); }{p = ((tokend))-1;}} + goto st88; +tr247: +#line 824 "rlscan.rl" + {tokend = p+1;{ token( TK_MiddleGblError ); }{p = ((tokend))-1;}} + goto st88; +tr248: +#line 808 "rlscan.rl" + {tokend = p+1;{ token( TK_MiddleFromState ); }{p = ((tokend))-1;}} + goto st88; +tr249: +#line 816 "rlscan.rl" + {tokend = p+1;{ token( TK_MiddleEOF ); }{p = ((tokend))-1;}} + goto st88; +tr250: +#line 832 "rlscan.rl" + {tokend = p+1;{ token( TK_MiddleLocalError ); }{p = ((tokend))-1;}} + goto st88; +tr251: +#line 800 "rlscan.rl" + {tokend = p+1;{ token( TK_MiddleToState ); }{p = ((tokend))-1;}} + goto st88; +tr252: +#line 846 "rlscan.rl" + {tokend = p+1;{ token( TK_DoubleArrow ); }{p = ((tokend))-1;}} + goto st88; +tr253: +#line 819 "rlscan.rl" + {tokend = p+1;{ token( TK_StartGblError ); }{p = ((tokend))-1;}} + goto st88; +tr254: +#line 803 "rlscan.rl" + {tokend = p+1;{ token( TK_StartFromState ); }{p = ((tokend))-1;}} + goto st88; +tr255: +#line 811 "rlscan.rl" + {tokend = p+1;{ token( TK_StartEOF ); }{p = ((tokend))-1;}} + goto st88; +tr256: +#line 838 "rlscan.rl" + {tokend = p+1;{ token( TK_StartCond ); }{p = ((tokend))-1;}} + goto st88; +tr257: +#line 827 "rlscan.rl" + {tokend = p+1;{ token( TK_StartLocalError ); }{p = ((tokend))-1;}} + goto st88; +tr258: +#line 795 "rlscan.rl" + {tokend = p+1;{ token( TK_StartToState ); }{p = ((tokend))-1;}} + goto st88; +tr259: +#line 823 "rlscan.rl" + {tokend = p+1;{ token( TK_NotFinalGblError ); }{p = ((tokend))-1;}} + goto st88; +tr260: +#line 807 "rlscan.rl" + {tokend = p+1;{ token( TK_NotFinalFromState ); }{p = ((tokend))-1;}} + goto st88; +tr261: +#line 815 "rlscan.rl" + {tokend = p+1;{ token( TK_NotFinalEOF ); }{p = ((tokend))-1;}} + goto st88; +tr262: +#line 831 "rlscan.rl" + {tokend = p+1;{ token( TK_NotFinalLocalError ); }{p = ((tokend))-1;}} + goto st88; +tr263: +#line 799 "rlscan.rl" + {tokend = p+1;{ token( TK_NotFinalToState ); }{p = ((tokend))-1;}} + goto st88; +tr264: +#line 1 "rlscan.rl" + { switch( act ) { + case 62: + { token( KW_Machine ); } + break; + case 63: + { token( KW_Include ); } + break; + case 64: + { token( KW_Import ); } + break; + case 65: + { + token( KW_Write ); + {{p = ((tokend))-1;}{goto st85;}} + } + break; + case 66: + { token( KW_Action ); } + break; + case 67: + { token( KW_AlphType ); } + break; + case 68: + { + token( KW_GetKey ); + inlineBlockType = SemiTerminated; + {{p = ((tokend))-1;}{goto st37;}} + } + break; + case 69: + { + token( KW_Access ); + inlineBlockType = SemiTerminated; + {{p = ((tokend))-1;}{goto st37;}} + } + break; + case 70: + { + token( KW_Variable ); + inlineBlockType = SemiTerminated; + {{p = ((tokend))-1;}{goto st37;}} + } + break; + case 71: + { token( KW_When ); } + break; + case 72: + { token( KW_Eof ); } + break; + case 73: + { token( KW_Err ); } + break; + case 74: + { token( KW_Lerr ); } + break; + case 75: + { token( KW_To ); } + break; + case 76: + { token( KW_From ); } + break; + case 77: + { token( KW_Export ); } + break; + case 78: + { token( TK_Word, tokstart, tokend ); } + break; + default: break; + } + {p = ((tokend))-1;}} + goto st88; +tr265: +#line 784 "rlscan.rl" + {tokend = p;{ token( RE_SqOpen ); {{p = ((tokend))-1;}{stack[top++] = 88; goto st79;}} }{p = ((tokend))-1;}} + goto st88; +tr266: +#line 785 "rlscan.rl" + {tokend = p+1;{ token( RE_SqOpenNeg ); {{p = ((tokend))-1;}{stack[top++] = 88; goto st79;}} }{p = ((tokend))-1;}} + goto st88; +tr267: +#line 774 "rlscan.rl" + {tokend = p;{ token( TK_Word, tokstart, tokend ); }{p = ((tokend))-1;}} + goto st88; +tr336: +#line 853 "rlscan.rl" + {tokend = p+1;{ token( TK_BarStar ); }{p = ((tokend))-1;}} + goto st88; +st88: +#line 1 "rlscan.rl" + {tokstart = 0;} + if ( ++p == pe ) + goto _out88; +case 88: +#line 1 "rlscan.rl" + {tokstart = p;} +#line 3117 "rlscan.cpp" + switch( (*p) ) { + case 0: goto tr177; + case 9: goto st89; + case 10: goto tr179; + case 13: goto st89; + case 32: goto st89; + case 34: goto tr180; + case 35: goto tr181; + case 36: goto st93; + case 37: goto st94; + case 39: goto tr184; + case 42: goto st96; + case 45: goto st97; + case 46: goto st98; + case 47: goto tr188; + case 48: goto tr189; + case 58: goto st102; + case 60: goto st104; + case 61: goto st106; + case 62: goto st107; + case 64: goto st108; + case 91: goto st110; + case 95: goto tr196; + case 97: goto st111; + case 101: goto st125; + case 102: goto st132; + case 103: goto st135; + case 105: goto st140; + case 108: goto st150; + case 109: goto st153; + case 116: goto st159; + case 118: goto st160; + case 119: goto st167; + case 123: goto tr208; + case 124: goto st173; + case 125: goto tr210; + } + if ( (*p) < 65 ) { + if ( 49 <= (*p) && (*p) <= 57 ) + goto st100; + } else if ( (*p) > 90 ) { + if ( 98 <= (*p) && (*p) <= 122 ) + goto tr196; + } else + goto tr196; + goto tr176; +st89: + if ( ++p == pe ) + goto _out89; +case 89: + switch( (*p) ) { + case 9: goto st89; + case 13: goto st89; + case 32: goto st89; + } + goto tr211; +tr180: +#line 1 "rlscan.rl" + {tokend = p+1;} + goto st90; +st90: + if ( ++p == pe ) + goto _out90; +case 90: +#line 3182 "rlscan.cpp" + switch( (*p) ) { + case 10: goto tr29; + case 34: goto st91; + case 92: goto st17; + } + goto st16; +tr29: +#line 532 "rlscan.rl" + { + lastnl = p; + column = 0; + line++; + } + goto st16; +st16: + if ( ++p == pe ) + goto _out16; +case 16: +#line 3201 "rlscan.cpp" + switch( (*p) ) { + case 10: goto tr29; + case 34: goto st91; + case 92: goto st17; + } + goto st16; +st91: + if ( ++p == pe ) + goto _out91; +case 91: + if ( (*p) == 105 ) + goto tr213; + goto tr212; +st17: + if ( ++p == pe ) + goto _out17; +case 17: + if ( (*p) == 10 ) + goto tr29; + goto st16; +tr181: +#line 1 "rlscan.rl" + {tokend = p+1;} + goto st92; +st92: + if ( ++p == pe ) + goto _out92; +case 92: +#line 3230 "rlscan.cpp" + if ( (*p) == 10 ) + goto tr33; + goto st18; +st18: + if ( ++p == pe ) + goto _out18; +case 18: + if ( (*p) == 10 ) + goto tr33; + goto st18; +st93: + if ( ++p == pe ) + goto _out93; +case 93: + switch( (*p) ) { + case 33: goto tr215; + case 42: goto tr216; + case 47: goto tr217; + case 63: goto tr218; + case 94: goto tr219; + case 126: goto tr220; + } + goto tr214; +st94: + if ( ++p == pe ) + goto _out94; +case 94: + switch( (*p) ) { + case 33: goto tr221; + case 42: goto tr222; + case 47: goto tr223; + case 63: goto tr224; + case 94: goto tr225; + case 126: goto tr226; + } + goto tr214; +tr184: +#line 1 "rlscan.rl" + {tokend = p+1;} + goto st95; +st95: + if ( ++p == pe ) + goto _out95; +case 95: +#line 3275 "rlscan.cpp" + switch( (*p) ) { + case 10: goto tr35; + case 39: goto st91; + case 92: goto st20; + } + goto st19; +tr35: +#line 532 "rlscan.rl" + { + lastnl = p; + column = 0; + line++; + } + goto st19; +st19: + if ( ++p == pe ) + goto _out19; +case 19: +#line 3294 "rlscan.cpp" + switch( (*p) ) { + case 10: goto tr35; + case 39: goto st91; + case 92: goto st20; + } + goto st19; +st20: + if ( ++p == pe ) + goto _out20; +case 20: + if ( (*p) == 10 ) + goto tr35; + goto st19; +st96: + if ( ++p == pe ) + goto _out96; +case 96: + if ( (*p) == 42 ) + goto tr227; + goto tr214; +st97: + if ( ++p == pe ) + goto _out97; +case 97: + switch( (*p) ) { + case 45: goto tr228; + case 62: goto tr229; + } + goto tr214; +st98: + if ( ++p == pe ) + goto _out98; +case 98: + if ( (*p) == 46 ) + goto tr230; + goto tr214; +tr189: +#line 1 "rlscan.rl" + {tokend = p+1;} + goto st99; +st99: + if ( ++p == pe ) + goto _out99; +case 99: +#line 3339 "rlscan.cpp" + if ( (*p) == 120 ) + goto st21; + if ( 48 <= (*p) && (*p) <= 57 ) + goto st100; + goto tr231; +st100: + if ( ++p == pe ) + goto _out100; +case 100: + if ( 48 <= (*p) && (*p) <= 57 ) + goto st100; + goto tr231; +st21: + if ( ++p == pe ) + goto _out21; +case 21: + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto st101; + } else if ( (*p) > 70 ) { + if ( 97 <= (*p) && (*p) <= 102 ) + goto st101; + } else + goto st101; + goto tr37; +st101: + if ( ++p == pe ) + goto _out101; +case 101: + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto st101; + } else if ( (*p) > 70 ) { + if ( 97 <= (*p) && (*p) <= 102 ) + goto st101; + } else + goto st101; + goto tr233; +st102: + if ( ++p == pe ) + goto _out102; +case 102: + switch( (*p) ) { + case 58: goto tr234; + case 61: goto tr235; + case 62: goto st103; + } + goto tr214; +st103: + if ( ++p == pe ) + goto _out103; +case 103: + if ( (*p) == 62 ) + goto tr238; + goto tr237; +st104: + if ( ++p == pe ) + goto _out104; +case 104: + switch( (*p) ) { + case 33: goto tr239; + case 42: goto tr240; + case 47: goto tr241; + case 58: goto tr242; + case 62: goto st105; + case 94: goto tr244; + case 126: goto tr245; + } + goto tr214; +st105: + if ( ++p == pe ) + goto _out105; +case 105: + switch( (*p) ) { + case 33: goto tr247; + case 42: goto tr248; + case 47: goto tr249; + case 94: goto tr250; + case 126: goto tr251; + } + goto tr246; +st106: + if ( ++p == pe ) + goto _out106; +case 106: + if ( (*p) == 62 ) + goto tr252; + goto tr214; +st107: + if ( ++p == pe ) + goto _out107; +case 107: + switch( (*p) ) { + case 33: goto tr253; + case 42: goto tr254; + case 47: goto tr255; + case 63: goto tr256; + case 94: goto tr257; + case 126: goto tr258; + } + goto tr214; +st108: + if ( ++p == pe ) + goto _out108; +case 108: + switch( (*p) ) { + case 33: goto tr259; + case 42: goto tr260; + case 47: goto tr261; + case 94: goto tr262; + case 126: goto tr263; + } + goto tr214; +tr196: +#line 1 "rlscan.rl" + {tokend = p+1;} +#line 774 "rlscan.rl" + {act = 78;} + goto st109; +tr274: +#line 1 "rlscan.rl" + {tokend = p+1;} +#line 755 "rlscan.rl" + {act = 69;} + goto st109; +tr277: +#line 1 "rlscan.rl" + {tokend = p+1;} +#line 744 "rlscan.rl" + {act = 66;} + goto st109; +tr283: +#line 1 "rlscan.rl" + {tokend = p+1;} +#line 745 "rlscan.rl" + {act = 67;} + goto st109; +tr287: +#line 1 "rlscan.rl" + {tokend = p+1;} +#line 766 "rlscan.rl" + {act = 72;} + goto st109; +tr288: +#line 1 "rlscan.rl" + {tokend = p+1;} +#line 767 "rlscan.rl" + {act = 73;} + goto st109; +tr292: +#line 1 "rlscan.rl" + {tokend = p+1;} +#line 771 "rlscan.rl" + {act = 77;} + goto st109; +tr295: +#line 1 "rlscan.rl" + {tokend = p+1;} +#line 770 "rlscan.rl" + {act = 76;} + goto st109; +tr300: +#line 1 "rlscan.rl" + {tokend = p+1;} +#line 750 "rlscan.rl" + {act = 68;} + goto st109; +tr306: +#line 1 "rlscan.rl" + {tokend = p+1;} +#line 739 "rlscan.rl" + {act = 64;} + goto st109; +tr311: +#line 1 "rlscan.rl" + {tokend = p+1;} +#line 738 "rlscan.rl" + {act = 63;} + goto st109; +tr314: +#line 1 "rlscan.rl" + {tokend = p+1;} +#line 768 "rlscan.rl" + {act = 74;} + goto st109; +tr320: +#line 1 "rlscan.rl" + {tokend = p+1;} +#line 737 "rlscan.rl" + {act = 62;} + goto st109; +tr321: +#line 1 "rlscan.rl" + {tokend = p+1;} +#line 769 "rlscan.rl" + {act = 75;} + goto st109; +tr328: +#line 1 "rlscan.rl" + {tokend = p+1;} +#line 760 "rlscan.rl" + {act = 70;} + goto st109; +tr332: +#line 1 "rlscan.rl" + {tokend = p+1;} +#line 765 "rlscan.rl" + {act = 71;} + goto st109; +tr335: +#line 1 "rlscan.rl" + {tokend = p+1;} +#line 740 "rlscan.rl" + {act = 65;} + goto st109; +st109: + if ( ++p == pe ) + goto _out109; +case 109: +#line 3559 "rlscan.cpp" + if ( (*p) == 95 ) + goto tr196; + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr196; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr196; + } else + goto tr196; + goto tr264; +st110: + if ( ++p == pe ) + goto _out110; +case 110: + if ( (*p) == 94 ) + goto tr266; + goto tr265; +st111: + if ( ++p == pe ) + goto _out111; +case 111: + switch( (*p) ) { + case 95: goto tr196; + case 99: goto st112; + case 108: goto st119; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr196; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr196; + } else + goto tr196; + goto tr267; +st112: + if ( ++p == pe ) + goto _out112; +case 112: + switch( (*p) ) { + case 95: goto tr196; + case 99: goto st113; + case 116: goto st116; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr196; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr196; + } else + goto tr196; + goto tr267; +st113: + if ( ++p == pe ) + goto _out113; +case 113: + switch( (*p) ) { + case 95: goto tr196; + case 101: goto st114; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr196; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr196; + } else + goto tr196; + goto tr267; +st114: + if ( ++p == pe ) + goto _out114; +case 114: + switch( (*p) ) { + case 95: goto tr196; + case 115: goto st115; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr196; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr196; + } else + goto tr196; + goto tr267; +st115: + if ( ++p == pe ) + goto _out115; +case 115: + switch( (*p) ) { + case 95: goto tr196; + case 115: goto tr274; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr196; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr196; + } else + goto tr196; + goto tr267; +st116: + if ( ++p == pe ) + goto _out116; +case 116: + switch( (*p) ) { + case 95: goto tr196; + case 105: goto st117; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr196; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr196; + } else + goto tr196; + goto tr267; +st117: + if ( ++p == pe ) + goto _out117; +case 117: + switch( (*p) ) { + case 95: goto tr196; + case 111: goto st118; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr196; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr196; + } else + goto tr196; + goto tr267; +st118: + if ( ++p == pe ) + goto _out118; +case 118: + switch( (*p) ) { + case 95: goto tr196; + case 110: goto tr277; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr196; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr196; + } else + goto tr196; + goto tr267; +st119: + if ( ++p == pe ) + goto _out119; +case 119: + switch( (*p) ) { + case 95: goto tr196; + case 112: goto st120; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr196; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr196; + } else + goto tr196; + goto tr267; +st120: + if ( ++p == pe ) + goto _out120; +case 120: + switch( (*p) ) { + case 95: goto tr196; + case 104: goto st121; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr196; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr196; + } else + goto tr196; + goto tr267; +st121: + if ( ++p == pe ) + goto _out121; +case 121: + switch( (*p) ) { + case 95: goto tr196; + case 116: goto st122; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr196; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr196; + } else + goto tr196; + goto tr267; +st122: + if ( ++p == pe ) + goto _out122; +case 122: + switch( (*p) ) { + case 95: goto tr196; + case 121: goto st123; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr196; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr196; + } else + goto tr196; + goto tr267; +st123: + if ( ++p == pe ) + goto _out123; +case 123: + switch( (*p) ) { + case 95: goto tr196; + case 112: goto st124; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr196; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr196; + } else + goto tr196; + goto tr267; +st124: + if ( ++p == pe ) + goto _out124; +case 124: + switch( (*p) ) { + case 95: goto tr196; + case 101: goto tr283; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr196; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr196; + } else + goto tr196; + goto tr267; +st125: + if ( ++p == pe ) + goto _out125; +case 125: + switch( (*p) ) { + case 95: goto tr196; + case 111: goto st126; + case 114: goto st127; + case 120: goto st128; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr196; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr196; + } else + goto tr196; + goto tr267; +st126: + if ( ++p == pe ) + goto _out126; +case 126: + switch( (*p) ) { + case 95: goto tr196; + case 102: goto tr287; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr196; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr196; + } else + goto tr196; + goto tr267; +st127: + if ( ++p == pe ) + goto _out127; +case 127: + switch( (*p) ) { + case 95: goto tr196; + case 114: goto tr288; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr196; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr196; + } else + goto tr196; + goto tr267; +st128: + if ( ++p == pe ) + goto _out128; +case 128: + switch( (*p) ) { + case 95: goto tr196; + case 112: goto st129; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr196; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr196; + } else + goto tr196; + goto tr267; +st129: + if ( ++p == pe ) + goto _out129; +case 129: + switch( (*p) ) { + case 95: goto tr196; + case 111: goto st130; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr196; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr196; + } else + goto tr196; + goto tr267; +st130: + if ( ++p == pe ) + goto _out130; +case 130: + switch( (*p) ) { + case 95: goto tr196; + case 114: goto st131; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr196; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr196; + } else + goto tr196; + goto tr267; +st131: + if ( ++p == pe ) + goto _out131; +case 131: + switch( (*p) ) { + case 95: goto tr196; + case 116: goto tr292; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr196; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr196; + } else + goto tr196; + goto tr267; +st132: + if ( ++p == pe ) + goto _out132; +case 132: + switch( (*p) ) { + case 95: goto tr196; + case 114: goto st133; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr196; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr196; + } else + goto tr196; + goto tr267; +st133: + if ( ++p == pe ) + goto _out133; +case 133: + switch( (*p) ) { + case 95: goto tr196; + case 111: goto st134; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr196; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr196; + } else + goto tr196; + goto tr267; +st134: + if ( ++p == pe ) + goto _out134; +case 134: + switch( (*p) ) { + case 95: goto tr196; + case 109: goto tr295; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr196; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr196; + } else + goto tr196; + goto tr267; +st135: + if ( ++p == pe ) + goto _out135; +case 135: + switch( (*p) ) { + case 95: goto tr196; + case 101: goto st136; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr196; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr196; + } else + goto tr196; + goto tr267; +st136: + if ( ++p == pe ) + goto _out136; +case 136: + switch( (*p) ) { + case 95: goto tr196; + case 116: goto st137; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr196; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr196; + } else + goto tr196; + goto tr267; +st137: + if ( ++p == pe ) + goto _out137; +case 137: + switch( (*p) ) { + case 95: goto tr196; + case 107: goto st138; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr196; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr196; + } else + goto tr196; + goto tr267; +st138: + if ( ++p == pe ) + goto _out138; +case 138: + switch( (*p) ) { + case 95: goto tr196; + case 101: goto st139; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr196; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr196; + } else + goto tr196; + goto tr267; +st139: + if ( ++p == pe ) + goto _out139; +case 139: + switch( (*p) ) { + case 95: goto tr196; + case 121: goto tr300; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr196; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr196; + } else + goto tr196; + goto tr267; +st140: + if ( ++p == pe ) + goto _out140; +case 140: + switch( (*p) ) { + case 95: goto tr196; + case 109: goto st141; + case 110: goto st145; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr196; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr196; + } else + goto tr196; + goto tr267; +st141: + if ( ++p == pe ) + goto _out141; +case 141: + switch( (*p) ) { + case 95: goto tr196; + case 112: goto st142; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr196; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr196; + } else + goto tr196; + goto tr267; +st142: + if ( ++p == pe ) + goto _out142; +case 142: + switch( (*p) ) { + case 95: goto tr196; + case 111: goto st143; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr196; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr196; + } else + goto tr196; + goto tr267; +st143: + if ( ++p == pe ) + goto _out143; +case 143: + switch( (*p) ) { + case 95: goto tr196; + case 114: goto st144; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr196; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr196; + } else + goto tr196; + goto tr267; +st144: + if ( ++p == pe ) + goto _out144; +case 144: + switch( (*p) ) { + case 95: goto tr196; + case 116: goto tr306; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr196; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr196; + } else + goto tr196; + goto tr267; +st145: + if ( ++p == pe ) + goto _out145; +case 145: + switch( (*p) ) { + case 95: goto tr196; + case 99: goto st146; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr196; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr196; + } else + goto tr196; + goto tr267; +st146: + if ( ++p == pe ) + goto _out146; +case 146: + switch( (*p) ) { + case 95: goto tr196; + case 108: goto st147; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr196; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr196; + } else + goto tr196; + goto tr267; +st147: + if ( ++p == pe ) + goto _out147; +case 147: + switch( (*p) ) { + case 95: goto tr196; + case 117: goto st148; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr196; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr196; + } else + goto tr196; + goto tr267; +st148: + if ( ++p == pe ) + goto _out148; +case 148: + switch( (*p) ) { + case 95: goto tr196; + case 100: goto st149; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr196; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr196; + } else + goto tr196; + goto tr267; +st149: + if ( ++p == pe ) + goto _out149; +case 149: + switch( (*p) ) { + case 95: goto tr196; + case 101: goto tr311; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr196; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr196; + } else + goto tr196; + goto tr267; +st150: + if ( ++p == pe ) + goto _out150; +case 150: + switch( (*p) ) { + case 95: goto tr196; + case 101: goto st151; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr196; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr196; + } else + goto tr196; + goto tr267; +st151: + if ( ++p == pe ) + goto _out151; +case 151: + switch( (*p) ) { + case 95: goto tr196; + case 114: goto st152; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr196; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr196; + } else + goto tr196; + goto tr267; +st152: + if ( ++p == pe ) + goto _out152; +case 152: + switch( (*p) ) { + case 95: goto tr196; + case 114: goto tr314; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr196; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr196; + } else + goto tr196; + goto tr267; +st153: + if ( ++p == pe ) + goto _out153; +case 153: + switch( (*p) ) { + case 95: goto tr196; + case 97: goto st154; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr196; + } else if ( (*p) > 90 ) { + if ( 98 <= (*p) && (*p) <= 122 ) + goto tr196; + } else + goto tr196; + goto tr267; +st154: + if ( ++p == pe ) + goto _out154; +case 154: + switch( (*p) ) { + case 95: goto tr196; + case 99: goto st155; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr196; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr196; + } else + goto tr196; + goto tr267; +st155: + if ( ++p == pe ) + goto _out155; +case 155: + switch( (*p) ) { + case 95: goto tr196; + case 104: goto st156; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr196; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr196; + } else + goto tr196; + goto tr267; +st156: + if ( ++p == pe ) + goto _out156; +case 156: + switch( (*p) ) { + case 95: goto tr196; + case 105: goto st157; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr196; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr196; + } else + goto tr196; + goto tr267; +st157: + if ( ++p == pe ) + goto _out157; +case 157: + switch( (*p) ) { + case 95: goto tr196; + case 110: goto st158; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr196; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr196; + } else + goto tr196; + goto tr267; +st158: + if ( ++p == pe ) + goto _out158; +case 158: + switch( (*p) ) { + case 95: goto tr196; + case 101: goto tr320; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr196; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr196; + } else + goto tr196; + goto tr267; +st159: + if ( ++p == pe ) + goto _out159; +case 159: + switch( (*p) ) { + case 95: goto tr196; + case 111: goto tr321; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr196; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr196; + } else + goto tr196; + goto tr267; +st160: + if ( ++p == pe ) + goto _out160; +case 160: + switch( (*p) ) { + case 95: goto tr196; + case 97: goto st161; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr196; + } else if ( (*p) > 90 ) { + if ( 98 <= (*p) && (*p) <= 122 ) + goto tr196; + } else + goto tr196; + goto tr267; +st161: + if ( ++p == pe ) + goto _out161; +case 161: + switch( (*p) ) { + case 95: goto tr196; + case 114: goto st162; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr196; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr196; + } else + goto tr196; + goto tr267; +st162: + if ( ++p == pe ) + goto _out162; +case 162: + switch( (*p) ) { + case 95: goto tr196; + case 105: goto st163; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr196; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr196; + } else + goto tr196; + goto tr267; +st163: + if ( ++p == pe ) + goto _out163; +case 163: + switch( (*p) ) { + case 95: goto tr196; + case 97: goto st164; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr196; + } else if ( (*p) > 90 ) { + if ( 98 <= (*p) && (*p) <= 122 ) + goto tr196; + } else + goto tr196; + goto tr267; +st164: + if ( ++p == pe ) + goto _out164; +case 164: + switch( (*p) ) { + case 95: goto tr196; + case 98: goto st165; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr196; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr196; + } else + goto tr196; + goto tr267; +st165: + if ( ++p == pe ) + goto _out165; +case 165: + switch( (*p) ) { + case 95: goto tr196; + case 108: goto st166; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr196; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr196; + } else + goto tr196; + goto tr267; +st166: + if ( ++p == pe ) + goto _out166; +case 166: + switch( (*p) ) { + case 95: goto tr196; + case 101: goto tr328; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr196; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr196; + } else + goto tr196; + goto tr267; +st167: + if ( ++p == pe ) + goto _out167; +case 167: + switch( (*p) ) { + case 95: goto tr196; + case 104: goto st168; + case 114: goto st170; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr196; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr196; + } else + goto tr196; + goto tr267; +st168: + if ( ++p == pe ) + goto _out168; +case 168: + switch( (*p) ) { + case 95: goto tr196; + case 101: goto st169; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr196; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr196; + } else + goto tr196; + goto tr267; +st169: + if ( ++p == pe ) + goto _out169; +case 169: + switch( (*p) ) { + case 95: goto tr196; + case 110: goto tr332; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr196; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr196; + } else + goto tr196; + goto tr267; +st170: + if ( ++p == pe ) + goto _out170; +case 170: + switch( (*p) ) { + case 95: goto tr196; + case 105: goto st171; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr196; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr196; + } else + goto tr196; + goto tr267; +st171: + if ( ++p == pe ) + goto _out171; +case 171: + switch( (*p) ) { + case 95: goto tr196; + case 116: goto st172; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr196; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr196; + } else + goto tr196; + goto tr267; +st172: + if ( ++p == pe ) + goto _out172; +case 172: + switch( (*p) ) { + case 95: goto tr196; + case 101: goto tr335; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr196; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr196; + } else + goto tr196; + goto tr267; +st173: + if ( ++p == pe ) + goto _out173; +case 173: + if ( (*p) == 42 ) + goto tr336; + goto tr214; +tr210: +#line 1 "rlscan.rl" + {tokend = p+1;} + goto st174; +st174: + if ( ++p == pe ) + goto _out174; +case 174: +#line 4653 "rlscan.cpp" + if ( (*p) == 37 ) + goto st22; + goto tr214; +st22: + if ( ++p == pe ) + goto _out22; +case 22: + if ( (*p) == 37 ) + goto tr40; + goto tr39; + } + _out23: cs = 23; goto _out; + _out24: cs = 24; goto _out; + _out25: cs = 25; goto _out; + _out1: cs = 1; goto _out; + _out2: cs = 2; goto _out; + _out26: cs = 26; goto _out; + _out27: cs = 27; goto _out; + _out28: cs = 28; goto _out; + _out3: cs = 3; goto _out; + _out4: cs = 4; goto _out; + _out29: cs = 29; goto _out; + _out5: cs = 5; goto _out; + _out6: cs = 6; goto _out; + _out7: cs = 7; goto _out; + _out30: cs = 30; goto _out; + _out31: cs = 31; goto _out; + _out32: cs = 32; goto _out; + _out33: cs = 33; goto _out; + _out34: cs = 34; goto _out; + _out35: cs = 35; goto _out; + _out36: cs = 36; goto _out; + _out37: cs = 37; goto _out; + _out38: cs = 38; goto _out; + _out39: cs = 39; goto _out; + _out8: cs = 8; goto _out; + _out9: cs = 9; goto _out; + _out40: cs = 40; goto _out; + _out10: cs = 10; goto _out; + _out11: cs = 11; goto _out; + _out41: cs = 41; goto _out; + _out12: cs = 12; goto _out; + _out13: cs = 13; goto _out; + _out14: cs = 14; goto _out; + _out42: cs = 42; goto _out; + _out43: cs = 43; goto _out; + _out15: cs = 15; goto _out; + _out44: cs = 44; goto _out; + _out45: cs = 45; goto _out; + _out46: cs = 46; goto _out; + _out47: cs = 47; goto _out; + _out48: cs = 48; goto _out; + _out49: cs = 49; goto _out; + _out50: cs = 50; goto _out; + _out51: cs = 51; goto _out; + _out52: cs = 52; goto _out; + _out53: cs = 53; goto _out; + _out54: cs = 54; goto _out; + _out55: cs = 55; goto _out; + _out56: cs = 56; goto _out; + _out57: cs = 57; goto _out; + _out58: cs = 58; goto _out; + _out59: cs = 59; goto _out; + _out60: cs = 60; goto _out; + _out61: cs = 61; goto _out; + _out62: cs = 62; goto _out; + _out63: cs = 63; goto _out; + _out64: cs = 64; goto _out; + _out65: cs = 65; goto _out; + _out66: cs = 66; goto _out; + _out67: cs = 67; goto _out; + _out68: cs = 68; goto _out; + _out69: cs = 69; goto _out; + _out70: cs = 70; goto _out; + _out71: cs = 71; goto _out; + _out72: cs = 72; goto _out; + _out73: cs = 73; goto _out; + _out74: cs = 74; goto _out; + _out75: cs = 75; goto _out; + _out76: cs = 76; goto _out; + _out77: cs = 77; goto _out; + _out78: cs = 78; goto _out; + _out79: cs = 79; goto _out; + _out80: cs = 80; goto _out; + _out81: cs = 81; goto _out; + _out82: cs = 82; goto _out; + _out83: cs = 83; goto _out; + _out84: cs = 84; goto _out; + _out85: cs = 85; goto _out; + _out0: cs = 0; goto _out; + _out86: cs = 86; goto _out; + _out87: cs = 87; goto _out; + _out88: cs = 88; goto _out; + _out89: cs = 89; goto _out; + _out90: cs = 90; goto _out; + _out16: cs = 16; goto _out; + _out91: cs = 91; goto _out; + _out17: cs = 17; goto _out; + _out92: cs = 92; goto _out; + _out18: cs = 18; goto _out; + _out93: cs = 93; goto _out; + _out94: cs = 94; goto _out; + _out95: cs = 95; goto _out; + _out19: cs = 19; goto _out; + _out20: cs = 20; goto _out; + _out96: cs = 96; goto _out; + _out97: cs = 97; goto _out; + _out98: cs = 98; goto _out; + _out99: cs = 99; goto _out; + _out100: cs = 100; goto _out; + _out21: cs = 21; goto _out; + _out101: cs = 101; goto _out; + _out102: cs = 102; goto _out; + _out103: cs = 103; goto _out; + _out104: cs = 104; goto _out; + _out105: cs = 105; goto _out; + _out106: cs = 106; goto _out; + _out107: cs = 107; goto _out; + _out108: cs = 108; goto _out; + _out109: cs = 109; goto _out; + _out110: cs = 110; goto _out; + _out111: cs = 111; goto _out; + _out112: cs = 112; goto _out; + _out113: cs = 113; goto _out; + _out114: cs = 114; goto _out; + _out115: cs = 115; goto _out; + _out116: cs = 116; goto _out; + _out117: cs = 117; goto _out; + _out118: cs = 118; goto _out; + _out119: cs = 119; goto _out; + _out120: cs = 120; goto _out; + _out121: cs = 121; goto _out; + _out122: cs = 122; goto _out; + _out123: cs = 123; goto _out; + _out124: cs = 124; goto _out; + _out125: cs = 125; goto _out; + _out126: cs = 126; goto _out; + _out127: cs = 127; goto _out; + _out128: cs = 128; goto _out; + _out129: cs = 129; goto _out; + _out130: cs = 130; goto _out; + _out131: cs = 131; goto _out; + _out132: cs = 132; goto _out; + _out133: cs = 133; goto _out; + _out134: cs = 134; goto _out; + _out135: cs = 135; goto _out; + _out136: cs = 136; goto _out; + _out137: cs = 137; goto _out; + _out138: cs = 138; goto _out; + _out139: cs = 139; goto _out; + _out140: cs = 140; goto _out; + _out141: cs = 141; goto _out; + _out142: cs = 142; goto _out; + _out143: cs = 143; goto _out; + _out144: cs = 144; goto _out; + _out145: cs = 145; goto _out; + _out146: cs = 146; goto _out; + _out147: cs = 147; goto _out; + _out148: cs = 148; goto _out; + _out149: cs = 149; goto _out; + _out150: cs = 150; goto _out; + _out151: cs = 151; goto _out; + _out152: cs = 152; goto _out; + _out153: cs = 153; goto _out; + _out154: cs = 154; goto _out; + _out155: cs = 155; goto _out; + _out156: cs = 156; goto _out; + _out157: cs = 157; goto _out; + _out158: cs = 158; goto _out; + _out159: cs = 159; goto _out; + _out160: cs = 160; goto _out; + _out161: cs = 161; goto _out; + _out162: cs = 162; goto _out; + _out163: cs = 163; goto _out; + _out164: cs = 164; goto _out; + _out165: cs = 165; goto _out; + _out166: cs = 166; goto _out; + _out167: cs = 167; goto _out; + _out168: cs = 168; goto _out; + _out169: cs = 169; goto _out; + _out170: cs = 170; goto _out; + _out171: cs = 171; goto _out; + _out172: cs = 172; goto _out; + _out173: cs = 173; goto _out; + _out174: cs = 174; goto _out; + _out22: cs = 22; goto _out; + + _out: {} + } +#line 972 "rlscan.rl" + + /* Check if we failed. */ + if ( cs == rlscan_error ) { + /* Machine failed before finding a token. I'm not yet sure if this + * is reachable. */ + scan_error() << "scanner error" << endl; + exit(1); + } + + /* Decide if we need to preserve anything. */ + char *preserve = tokstart; + + /* Now set up the prefix. */ + if ( preserve == 0 ) + have = 0; + else { + /* There is data that needs to be shifted over. */ + have = pe - preserve; + memmove( buf, preserve, have ); + unsigned int shiftback = preserve - buf; + if ( tokstart != 0 ) + tokstart -= shiftback; + tokend -= shiftback; + + preserve = buf; + } + } + + delete[] buf; +} + +void scan( char *fileName, istream &input, ostream &output ) +{ +} diff --git a/contrib/tools/ragel5/ragel/rlscan.h b/contrib/tools/ragel5/ragel/rlscan.h new file mode 100644 index 0000000000..e6302aa4c9 --- /dev/null +++ b/contrib/tools/ragel5/ragel/rlscan.h @@ -0,0 +1,161 @@ +/* + * Copyright 2007 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _RLSCAN_H +#define _RLSCAN_H + +#include <iostream> +#include "rlscan.h" +#include "vector.h" +#include "rlparse.h" +#include "parsedata.h" +#include "avltree.h" +#include "vector.h" + +using std::istream; +using std::ostream; + +extern char *Parser_lelNames[]; + +/* This is used for tracking the current stack of include file/machine pairs. It is + * is used to detect and recursive include structure. */ +struct IncludeStackItem +{ + IncludeStackItem(const char *fileName, char *sectionName ) + : fileName(fileName), sectionName(sectionName) {} + + const char *fileName; + char *sectionName; +}; + +typedef Vector<IncludeStackItem> IncludeStack; + +inline char* resolvePath(const char* rel, const char* abs) { + const size_t l1 = strlen(rel); + const size_t l2 = strlen(abs); + char* ret = new char[l1 + l2 + 1]; + + const char* p = strrchr(abs, '/') + 1; + const size_t l3 = p - abs; + + memcpy(ret, abs, l3); + strcpy(ret + l3, rel); + + return ret; +} + +struct Scanner +{ + Scanner(const char *fileName, istream &input, ostream &output, + Parser *inclToParser, char *inclSectionTarg, + int includeDepth, bool importMachines ) + : + fileName(fileName), input(input), output(output), + inclToParser(inclToParser), + inclSectionTarg(inclSectionTarg), + includeDepth(includeDepth), + importMachines(importMachines), + cur_token(0), + line(1), column(1), lastnl(0), + parser(0), ignoreSection(false), + parserExistsError(false), + whitespaceOn(true), + lastToken(0) + {} + + bool recursiveInclude(const char *inclFileName, char *inclSectionName ); + + char *prepareFileName( char *inclFileName, int len ) + { + if (*inclFileName == '\"') { + inclFileName[len - 1] = 0; + ++inclFileName; + } + char* res = resolvePath(inclFileName, fileName); // there was a memory leek in the original too + return res; + } + + void init(); + void token( int type, char *start, char *end ); + void token( int type, char c ); + void token( int type ); + void processToken( int type, char *tokdata, int toklen ); + void directToParser( Parser *toParser, const char *tokFileName, int tokLine, + int tokColumn, int type, char *tokdata, int toklen ); + void flushImport( ); + void importToken( int type, char *start, char *end ); + void pass( int token, char *start, char *end ); + void pass(); + void updateCol(); + void startSection(); + void endSection(); + void do_scan(); + bool active(); + ostream &scan_error(); + + const char *fileName; + istream &input; + ostream &output; + Parser *inclToParser; + char *inclSectionTarg; + int includeDepth; + bool importMachines; + + /* For import parsing. */ + int tok_cs, tok_act; + int *tok_tokstart, *tok_tokend; + int cur_token; + static const int max_tokens = 32; + int token_data[max_tokens]; + char *token_strings[max_tokens]; + int token_lens[max_tokens]; + + /* For section processing. */ + int cs; + char *word, *lit; + int word_len, lit_len; + + /* For character scanning. */ + int line; + InputLoc sectionLoc; + char *tokstart, *tokend; + int column; + char *lastnl; + + /* Set by machine statements, these persist from section to section + * allowing for unnamed sections. */ + Parser *parser; + bool ignoreSection; + IncludeStack includeStack; + + /* This is set if ragel has already emitted an error stating that + * no section name has been seen and thus no parser exists. */ + bool parserExistsError; + + /* This is for inline code. By default it is on. It goes off for + * statements and values in inline blocks which are parsed. */ + bool whitespaceOn; + + /* Keeps a record of the previous token sent to the section parser. */ + int lastToken; +}; + +#endif /* _RLSCAN_H */ diff --git a/contrib/tools/ragel5/ragel/xmlcodegen.cpp b/contrib/tools/ragel5/ragel/xmlcodegen.cpp new file mode 100644 index 0000000000..021c97e87d --- /dev/null +++ b/contrib/tools/ragel5/ragel/xmlcodegen.cpp @@ -0,0 +1,713 @@ +/* + * Copyright 2005, 2006 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + + +#include "ragel.h" +#include "xmlcodegen.h" +#include "parsedata.h" +#include "fsmgraph.h" +#include <string.h> + +using namespace std; + +XMLCodeGen::XMLCodeGen( char *fsmName, ParseData *pd, FsmAp *fsm, + std::ostream &out ) +: + fsmName(fsmName), + pd(pd), + fsm(fsm), + out(out), + nextActionTableId(0) +{ +} + + +void XMLCodeGen::writeActionList() +{ + /* Determine which actions to write. */ + int nextActionId = 0; + for ( ActionList::Iter act = pd->actionList; act.lte(); act++ ) { + if ( act->numRefs() > 0 || act->numCondRefs > 0 ) + act->actionId = nextActionId++; + } + + /* Write the list. */ + out << " <action_list length=\"" << nextActionId << "\">\n"; + for ( ActionList::Iter act = pd->actionList; act.lte(); act++ ) { + if ( act->actionId >= 0 ) + writeAction( act ); + } + out << " </action_list>\n"; +} + +void XMLCodeGen::writeActionTableList() +{ + /* Must first order the action tables based on their id. */ + int numTables = nextActionTableId; + RedActionTable **tables = new RedActionTable*[numTables]; + for ( ActionTableMap::Iter at = actionTableMap; at.lte(); at++ ) + tables[at->id] = at; + + out << " <action_table_list length=\"" << numTables << "\">\n"; + for ( int t = 0; t < numTables; t++ ) { + out << " <action_table id=\"" << t << "\" length=\"" << + tables[t]->key.length() << "\">"; + for ( ActionTable::Iter atel = tables[t]->key; atel.lte(); atel++ ) { + out << atel->value->actionId; + if ( ! atel.last() ) + out << " "; + } + out << "</action_table>\n"; + } + out << " </action_table_list>\n"; + + delete[] tables; +} + +void XMLCodeGen::reduceActionTables() +{ + /* Reduce the actions tables to a set. */ + for ( StateList::Iter st = fsm->stateList; st.lte(); st++ ) { + RedActionTable *actionTable = 0; + + /* Reduce To State Actions. */ + if ( st->toStateActionTable.length() > 0 ) { + if ( actionTableMap.insert( st->toStateActionTable, &actionTable ) ) + actionTable->id = nextActionTableId++; + } + + /* Reduce From State Actions. */ + if ( st->fromStateActionTable.length() > 0 ) { + if ( actionTableMap.insert( st->fromStateActionTable, &actionTable ) ) + actionTable->id = nextActionTableId++; + } + + /* Reduce EOF actions. */ + if ( st->eofActionTable.length() > 0 ) { + if ( actionTableMap.insert( st->eofActionTable, &actionTable ) ) + actionTable->id = nextActionTableId++; + } + + /* Loop the transitions and reduce their actions. */ + for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) { + if ( trans->actionTable.length() > 0 ) { + if ( actionTableMap.insert( trans->actionTable, &actionTable ) ) + actionTable->id = nextActionTableId++; + } + } + } +} + +void XMLCodeGen::appendTrans( TransListVect &outList, Key lowKey, + Key highKey, TransAp *trans ) +{ + if ( trans->toState != 0 || trans->actionTable.length() > 0 ) + outList.append( TransEl( lowKey, highKey, trans ) ); +} + +void XMLCodeGen::writeKey( Key key ) +{ + if ( keyOps->isSigned ) + out << key.getVal(); + else + out << (unsigned long) key.getVal(); +} + +void XMLCodeGen::writeTrans( Key lowKey, Key highKey, TransAp *trans ) +{ + /* First reduce the action. */ + RedActionTable *actionTable = 0; + if ( trans->actionTable.length() > 0 ) + actionTable = actionTableMap.find( trans->actionTable ); + + /* Write the transition. */ + out << " <t>"; + writeKey( lowKey ); + out << " "; + writeKey( highKey ); + + if ( trans->toState != 0 ) + out << " " << trans->toState->alg.stateNum; + else + out << " x"; + + if ( actionTable != 0 ) + out << " " << actionTable->id; + else + out << " x"; + out << "</t>\n"; +} + +void XMLCodeGen::writeTransList( StateAp *state ) +{ + TransListVect outList; + + /* If there is only are no ranges the task is simple. */ + if ( state->outList.length() > 0 ) { + /* Loop each source range. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + /* Reduce the transition. If it reduced to anything then add it. */ + appendTrans( outList, trans->lowKey, trans->highKey, trans ); + } + } + + out << " <trans_list length=\"" << outList.length() << "\">\n"; + for ( TransListVect::Iter tvi = outList; tvi.lte(); tvi++ ) + writeTrans( tvi->lowKey, tvi->highKey, tvi->value ); + out << " </trans_list>\n"; +} + +void XMLCodeGen::writeLmSwitch( InlineItem *item ) +{ + LongestMatch *longestMatch = item->longestMatch; + + out << "<lm_switch"; + if ( longestMatch->lmSwitchHandlesError ) + out << " handles_error=\"t\""; + out << ">\n"; + + for ( LmPartList::Iter lmi = *longestMatch->longestMatchList; lmi.lte(); lmi++ ) { + if ( lmi->inLmSelect && lmi->action != 0 ) { + /* Open the action. Write it with the context that sets up _p + * when doing control flow changes from inside the machine. */ + out << " <sub_action id=\"" << lmi->longestMatchId << "\">"; + writeInlineList( lmi->action->inlineList, item ); + out << "</sub_action>\n"; + } + } + + out << " </lm_switch><exec><get_tokend></get_tokend></exec>"; +} + +void XMLCodeGen::writeText( InlineItem *item ) +{ + if ( item->prev == 0 || item->prev->type != InlineItem::Text ) + out << "<text>"; + xmlEscapeHost( out, item->data, strlen(item->data) ); + if ( item->next == 0 || item->next->type != InlineItem::Text ) + out << "</text>"; +} + +void XMLCodeGen::writeCtrlFlow( InlineItem *item, InlineItem *context ) +{ + if ( context != 0 ) { + out << "<sub_action>"; + + switch ( context->type ) { + case InlineItem::LmOnLast: + out << "<exec><get_tokend></get_tokend></exec>"; + break; + case InlineItem::LmOnNext: + out << "<exec><get_tokend></get_tokend></exec>"; + break; + case InlineItem::LmOnLagBehind: + out << "<exec><get_tokend></get_tokend></exec>"; + break; + case InlineItem::LmSwitch: + out << "<exec><get_tokend></get_tokend></exec>"; + break; + default: break; + } + } + + switch ( item->type ) { + case InlineItem::Goto: + writeGoto( item, context ); + break; + case InlineItem::GotoExpr: + writeGotoExpr( item, context ); + break; + case InlineItem::Call: + writeCall( item, context ); + break; + case InlineItem::CallExpr: + writeCallExpr( item, context ); + break; + case InlineItem::Next: + writeNext( item, context ); + break; + case InlineItem::NextExpr: + writeNextExpr( item, context ); + break; + case InlineItem::Break: + out << "<break></break>"; + break; + case InlineItem::Ret: + out << "<ret></ret>"; + break; + default: break; + } + + if ( context != 0 ) + out << "</sub_action>"; +} + +void XMLCodeGen::writePtrMod( InlineItem *item, InlineItem *context ) +{ + if ( context != 0 && ( context->type == InlineItem::LmOnNext || + context->type == InlineItem::LmOnLagBehind || + context->type == InlineItem::LmSwitch ) ) + { + switch ( item->type ) { + case InlineItem::Hold: + out << "<holdte></holdte>"; + break; + case InlineItem::Exec: + writeActionExecTE( item ); + break; + default: break; + } + } + else { + switch ( item->type ) { + case InlineItem::Hold: + out << "<hold></hold>"; + break; + case InlineItem::Exec: + writeActionExec( item ); + break; + default: break; + } + } +} + + +void XMLCodeGen::writeGoto( InlineItem *item, InlineItem *context ) +{ + if ( pd->generatingSectionSubset ) + out << "<goto>-1</goto>"; + else { + EntryMapEl *targ = fsm->entryPoints.find( item->nameTarg->id ); + out << "<goto>" << targ->value->alg.stateNum << "</goto>"; + } +} + +void XMLCodeGen::writeCall( InlineItem *item, InlineItem *context ) +{ + if ( pd->generatingSectionSubset ) + out << "<call>-1</call>"; + else { + EntryMapEl *targ = fsm->entryPoints.find( item->nameTarg->id ); + out << "<call>" << targ->value->alg.stateNum << "</call>"; + } +} + +void XMLCodeGen::writeNext( InlineItem *item, InlineItem *context ) +{ + if ( pd->generatingSectionSubset ) + out << "<next>-1</next>"; + else { + EntryMapEl *targ = fsm->entryPoints.find( item->nameTarg->id ); + out << "<next>" << targ->value->alg.stateNum << "</next>"; + } +} + +void XMLCodeGen::writeGotoExpr( InlineItem *item, InlineItem *context ) +{ + out << "<goto_expr>"; + writeInlineList( item->children, 0 ); + out << "</goto_expr>"; +} + +void XMLCodeGen::writeCallExpr( InlineItem *item, InlineItem *context ) +{ + out << "<call_expr>"; + writeInlineList( item->children, 0 ); + out << "</call_expr>"; +} + +void XMLCodeGen::writeNextExpr( InlineItem *item, InlineItem *context ) +{ + out << "<next_expr>"; + writeInlineList( item->children, 0 ); + out << "</next_expr>"; +} + +void XMLCodeGen::writeEntry( InlineItem * item ) +{ + if ( pd->generatingSectionSubset ) + out << "<entry>-1</entry>"; + else { + EntryMapEl *targ = fsm->entryPoints.find( item->nameTarg->id ); + out << "<entry>" << targ->value->alg.stateNum << "</entry>"; + } +} + +void XMLCodeGen::writeActionExec( InlineItem *item ) +{ + out << "<exec>"; + writeInlineList( item->children, 0 ); + out << "</exec>"; +} + +void XMLCodeGen::writeActionExecTE( InlineItem *item ) +{ + out << "<execte>"; + writeInlineList( item->children, 0 ); + out << "</execte>"; +} + +void XMLCodeGen::writeLmOnLast( InlineItem *item ) +{ + out << "<set_tokend>1</set_tokend>"; + if ( item->longestMatchPart->action != 0 ) { + out << "<sub_action>"; + writeInlineList( item->longestMatchPart->action->inlineList, item ); + out << "</sub_action>"; + } + out << "<exec><get_tokend></get_tokend></exec>"; +} + +void XMLCodeGen::writeLmOnNext( InlineItem *item ) +{ + out << "<set_tokend>0</set_tokend>"; + if ( item->longestMatchPart->action != 0 ) { + out << "<sub_action>"; + writeInlineList( item->longestMatchPart->action->inlineList, item ); + out << "</sub_action>"; + } + out << "<exec><get_tokend></get_tokend></exec>"; +} + +void XMLCodeGen::writeLmOnLagBehind( InlineItem *item ) +{ + if ( item->longestMatchPart->action != 0 ) { + out << "<sub_action>"; + writeInlineList( item->longestMatchPart->action->inlineList, item ); + out << "</sub_action>"; + } + out << "<exec><get_tokend></get_tokend></exec>"; +} + + +void XMLCodeGen::writeInlineList( InlineList *inlineList, InlineItem *context ) +{ + for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) { + switch ( item->type ) { + case InlineItem::Text: + writeText( item ); + break; + case InlineItem::Goto: case InlineItem::GotoExpr: + case InlineItem::Call: case InlineItem::CallExpr: + case InlineItem::Next: case InlineItem::NextExpr: + case InlineItem::Break: case InlineItem::Ret: + writeCtrlFlow( item, context ); + break; + case InlineItem::PChar: + out << "<pchar></pchar>"; + break; + case InlineItem::Char: + out << "<char></char>"; + break; + case InlineItem::Curs: + out << "<curs></curs>"; + break; + case InlineItem::Targs: + out << "<targs></targs>"; + break; + case InlineItem::Entry: + writeEntry( item ); + break; + + case InlineItem::Hold: + case InlineItem::Exec: + writePtrMod( item, context ); + break; + + case InlineItem::LmSwitch: + writeLmSwitch( item ); + break; + case InlineItem::LmSetActId: + out << "<set_act>" << + item->longestMatchPart->longestMatchId << + "</set_act>"; + break; + case InlineItem::LmSetTokEnd: + out << "<set_tokend>1</set_tokend>"; + break; + case InlineItem::LmOnLast: + writeLmOnLast( item ); + break; + case InlineItem::LmOnNext: + writeLmOnNext( item ); + break; + case InlineItem::LmOnLagBehind: + writeLmOnLagBehind( item ); + break; + case InlineItem::LmInitAct: + out << "<init_act></init_act>"; + break; + case InlineItem::LmInitTokStart: + out << "<init_tokstart></init_tokstart>"; + break; + case InlineItem::LmSetTokStart: + out << "<set_tokstart></set_tokstart>"; + break; + } + } +} + +void XMLCodeGen::writeAction( Action *action ) +{ + out << " <action id=\"" << action->actionId << "\""; + if ( action->name != 0 ) + out << " name=\"" << action->name << "\""; + out << " line=\"" << action->loc.line << "\" col=\"" << action->loc.col << "\">"; + writeInlineList( action->inlineList, 0 ); + out << "</action>\n"; +} + +void xmlEscapeHost( std::ostream &out, char *data, int len ) +{ + char *end = data + len; + while ( data != end ) { + switch ( *data ) { + case '<': out << "<"; break; + case '>': out << ">"; break; + case '&': out << "&"; break; + default: out << *data; break; + } + data += 1; + } +} + +void XMLCodeGen::writeStateActions( StateAp *state ) +{ + RedActionTable *toStateActions = 0; + if ( state->toStateActionTable.length() > 0 ) + toStateActions = actionTableMap.find( state->toStateActionTable ); + + RedActionTable *fromStateActions = 0; + if ( state->fromStateActionTable.length() > 0 ) + fromStateActions = actionTableMap.find( state->fromStateActionTable ); + + RedActionTable *eofActions = 0; + if ( state->eofActionTable.length() > 0 ) + eofActions = actionTableMap.find( state->eofActionTable ); + + if ( toStateActions != 0 || fromStateActions != 0 || eofActions != 0 ) { + out << " <state_actions>"; + if ( toStateActions != 0 ) + out << toStateActions->id; + else + out << "x"; + + if ( fromStateActions != 0 ) + out << " " << fromStateActions->id; + else + out << " x"; + + if ( eofActions != 0 ) + out << " " << eofActions->id; + else + out << " x"; out << "</state_actions>\n"; + } +} + +void XMLCodeGen::writeStateConditions( StateAp *state ) +{ + if ( state->stateCondList.length() > 0 ) { + out << " <cond_list length=\"" << state->stateCondList.length() << "\">\n"; + for ( StateCondList::Iter scdi = state->stateCondList; scdi.lte(); scdi++ ) { + out << " <c>"; + writeKey( scdi->lowKey ); + out << " "; + writeKey( scdi->highKey ); + out << " "; + out << scdi->condSpace->condSpaceId; + out << "</c>\n"; + } + out << " </cond_list>\n"; + } +} + +void XMLCodeGen::writeStateList() +{ + /* Write the list of states. */ + out << " <state_list length=\"" << fsm->stateList.length() << "\">\n"; + for ( StateList::Iter st = fsm->stateList; st.lte(); st++ ) { + out << " <state id=\"" << st->alg.stateNum << "\""; + if ( st->isFinState() ) + out << " final=\"t\""; + out << ">\n"; + + writeStateActions( st ); + writeStateConditions( st ); + writeTransList( st ); + + out << " </state>\n"; + + if ( !st.last() ) + out << "\n"; + } + out << " </state_list>\n"; +} + +bool XMLCodeGen::writeNameInst( NameInst *nameInst ) +{ + bool written = false; + if ( nameInst->parent != 0 ) + written = writeNameInst( nameInst->parent ); + + if ( nameInst->name != 0 ) { + if ( written ) + out << '_'; + out << nameInst->name; + written = true; + } + + return written; +} + +void XMLCodeGen::writeEntryPoints() +{ + /* List of entry points other than start state. */ + if ( fsm->entryPoints.length() > 0 || pd->lmRequiresErrorState ) { + out << " <entry_points"; + if ( pd->lmRequiresErrorState ) + out << " error=\"t\""; + out << ">\n"; + for ( EntryMap::Iter en = fsm->entryPoints; en.lte(); en++ ) { + /* Get the name instantiation from nameIndex. */ + NameInst *nameInst = pd->nameIndex[en->key]; + StateAp *state = en->value; + out << " <entry name=\""; + writeNameInst( nameInst ); + out << "\">" << state->alg.stateNum << "</entry>\n"; + } + out << " </entry_points>\n"; + } +} + +void XMLCodeGen::writeMachine() +{ + /* Open the machine. */ + out << " <machine>\n"; + + /* Action tables. */ + reduceActionTables(); + + writeActionList(); + writeActionTableList(); + writeConditions(); + + /* Start state. */ + GraphDictEl *mainEl = pd->graphDict.find( mainMachine ); + if ( mainEl != 0 ) { + out << " <start_state>" << fsm->startState->alg.stateNum << + "</start_state>\n"; + } + + /* Error state. */ + if ( fsm->errState != 0 ) { + out << " <error_state>" << fsm->errState->alg.stateNum << + "</error_state>\n"; + } + + writeEntryPoints(); + writeStateList(); + + out << " </machine>\n"; +} + +void XMLCodeGen::writeAlphType() +{ + out << " <alphtype>" << + (keyOps->alphType - hostLang->hostTypes) << "</alphtype>\n"; +} + +void XMLCodeGen::writeGetKeyExpr() +{ + out << " <getkey>"; + writeInlineList( pd->getKeyExpr, 0 ); + out << "</getkey>\n"; +} + +void XMLCodeGen::writeAccessExpr() +{ + out << " <access>"; + writeInlineList( pd->accessExpr, 0 ); + out << "</access>\n"; +} + +void XMLCodeGen::writeCurStateExpr() +{ + out << " <curstate>"; + writeInlineList( pd->curStateExpr, 0 ); + out << "</curstate>\n"; +} + +void XMLCodeGen::writeConditions() +{ + if ( condData->condSpaceMap.length() > 0 ) { + long nextCondSpaceId = 0; + for ( CondSpaceMap::Iter cs = condData->condSpaceMap; cs.lte(); cs++ ) + cs->condSpaceId = nextCondSpaceId++; + + out << " <cond_space_list length=\"" << condData->condSpaceMap.length() << "\">\n"; + for ( CondSpaceMap::Iter cs = condData->condSpaceMap; cs.lte(); cs++ ) { + out << " <cond_space id=\"" << cs->condSpaceId << + "\" length=\"" << cs->condSet.length() << "\">"; + writeKey( cs->baseKey ); + for ( CondSet::Iter csi = cs->condSet; csi.lte(); csi++ ) + out << " " << (*csi)->actionId; + out << "</cond_space>\n"; + } + out << " </cond_space_list>\n"; + } +} + +void XMLCodeGen::writeExports() +{ + if ( pd->exportList.length() > 0 ) { + out << " <exports>\n"; + for ( ExportList::Iter exp = pd->exportList; exp.lte(); exp++ ) { + out << " <ex name=\"" << exp->name << "\">"; + writeKey( exp->key ); + out << "</ex>\n"; + } + out << " </exports>\n"; + } +} + +void XMLCodeGen::writeXML() +{ + /* Open the definition. */ + out << "<ragel_def name=\"" << fsmName << "\">\n"; + writeAlphType(); + + if ( pd->getKeyExpr != 0 ) + writeGetKeyExpr(); + + if ( pd->accessExpr != 0 ) + writeAccessExpr(); + + if ( pd->curStateExpr != 0 ) + writeCurStateExpr(); + + writeExports(); + + writeMachine(); + + out << + "</ragel_def>\n"; +} + diff --git a/contrib/tools/ragel5/ragel/xmlcodegen.h b/contrib/tools/ragel5/ragel/xmlcodegen.h new file mode 100644 index 0000000000..99b985395a --- /dev/null +++ b/contrib/tools/ragel5/ragel/xmlcodegen.h @@ -0,0 +1,137 @@ +/* + * Copyright 2005, 2006 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _XMLDOTGEN_H +#define _XMLDOTGEN_H + +#include <iostream> +#include "avltree.h" +#include "fsmgraph.h" +#include "parsedata.h" + +/* Forwards. */ +struct TransAp; +struct FsmAp; +struct ParseData; + +struct RedActionTable +: + public AvlTreeEl<RedActionTable> +{ + RedActionTable( const ActionTable &key ) + : + key(key), + id(0) + { } + + const ActionTable &getKey() + { return key; } + + ActionTable key; + int id; +}; + +typedef AvlTree<RedActionTable, ActionTable, CmpActionTable> ActionTableMap; + +struct NextRedTrans +{ + Key lowKey, highKey; + TransAp *trans; + TransAp *next; + + void load() { + if ( trans != 0 ) { + next = trans->next; + lowKey = trans->lowKey; + highKey = trans->highKey; + } + } + + NextRedTrans( TransAp *t ) { + trans = t; + load(); + } + + void increment() { + trans = next; + load(); + } +}; + +class XMLCodeGen +{ +public: + XMLCodeGen( char *fsmName, ParseData *pd, FsmAp *fsm, std::ostream &out ); + void writeXML( ); + +private: + void appendTrans( TransListVect &outList, Key lowKey, Key highKey, TransAp *trans ); + void writeStateActions( StateAp *state ); + void writeStateList(); + void writeStateConditions( StateAp *state ); + + void writeKey( Key key ); + void writeText( InlineItem *item ); + void writeCtrlFlow( InlineItem *item, InlineItem *context ); + void writePtrMod( InlineItem *item, InlineItem *context ); + void writeGoto( InlineItem *item, InlineItem *context ); + void writeGotoExpr( InlineItem *item, InlineItem *context ); + void writeCall( InlineItem *item, InlineItem *context ); + void writeCallExpr( InlineItem *item, InlineItem *context ); + void writeNext( InlineItem *item, InlineItem *context ); + void writeNextExpr( InlineItem *item, InlineItem *context ); + void writeEntry( InlineItem *item ); + void writeLmSetActId( InlineItem *item ); + void writeLmOnLast( InlineItem *item ); + void writeLmOnNext( InlineItem *item ); + void writeLmOnLagBehind( InlineItem *item ); + + void writeExports(); + bool writeNameInst( NameInst *nameInst ); + void writeEntryPoints(); + void writeGetKeyExpr(); + void writeAccessExpr(); + void writeCurStateExpr(); + void writeConditions(); + void writeInlineList( InlineList *inlineList, InlineItem *context ); + void writeAlphType(); + void writeActionList(); + void writeActionTableList(); + void reduceTrans( TransAp *trans ); + void reduceActionTables(); + void writeTransList( StateAp *state ); + void writeTrans( Key lowKey, Key highKey, TransAp *defTrans ); + void writeAction( Action *action ); + void writeLmSwitch( InlineItem *item ); + void writeMachine(); + void writeActionExec( InlineItem *item ); + void writeActionExecTE( InlineItem *item ); + + char *fsmName; + ParseData *pd; + FsmAp *fsm; + std::ostream &out; + ActionTableMap actionTableMap; + int nextActionTableId; +}; + + +#endif /* _XMLDOTGEN_H */ diff --git a/contrib/tools/ragel5/ragel/ya.make b/contrib/tools/ragel5/ragel/ya.make new file mode 100644 index 0000000000..6966321b7c --- /dev/null +++ b/contrib/tools/ragel5/ragel/ya.make @@ -0,0 +1,26 @@ +PROGRAM(ragel5) + +NO_UTIL() +NO_COMPILER_WARNINGS() + +PEERDIR( + contrib/tools/ragel5/aapl + contrib/tools/ragel5/common +) + +SRCS( + fsmap.cpp + fsmattach.cpp + fsmbase.cpp + fsmgraph.cpp + fsmmin.cpp + fsmstate.cpp + main.cpp + parsedata.cpp + parsetree.cpp + rlparse.cpp + rlscan.cpp + xmlcodegen.cpp +) + +END() diff --git a/contrib/tools/ragel5/redfsm/gendata.cpp b/contrib/tools/ragel5/redfsm/gendata.cpp new file mode 100644 index 0000000000..b0893ccdc2 --- /dev/null +++ b/contrib/tools/ragel5/redfsm/gendata.cpp @@ -0,0 +1,717 @@ +/* + * Copyright 2005-2007 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "gendata.h" +#include <iostream> + +using std::cerr; +using std::endl; + +CodeGenData::CodeGenData( ostream &out ) +: + sourceFileName(0), + fsmName(0), + out(out), + redFsm(0), + allActions(0), + allActionTables(0), + allConditions(0), + allCondSpaces(0), + allStates(0), + nameIndex(0), + startState(-1), + errState(-1), + getKeyExpr(0), + accessExpr(0), + curStateExpr(0), + wantComplete(0), + hasLongestMatch(false), + codeGenErrCount(0), + hasEnd(true), + dataPrefix(true), + writeFirstFinal(true), + writeErr(true) +{} + + +void CodeGenData::createMachine() +{ + redFsm = new RedFsmAp(); +} + +void CodeGenData::initActionList( unsigned long length ) +{ + allActions = new Action[length]; + for ( unsigned long a = 0; a < length; a++ ) + actionList.append( allActions+a ); +} + +void CodeGenData::newAction( int anum, char *name, int line, + int col, InlineList *inlineList ) +{ + allActions[anum].actionId = anum; + allActions[anum].name = name; + allActions[anum].loc.line = line; + allActions[anum].loc.col = col; + allActions[anum].inlineList = inlineList; +} + +void CodeGenData::initActionTableList( unsigned long length ) +{ + allActionTables = new RedAction[length]; +} + +void CodeGenData::initStateList( unsigned long length ) +{ + allStates = new RedStateAp[length]; + for ( unsigned long s = 0; s < length; s++ ) + redFsm->stateList.append( allStates+s ); + + /* We get the start state as an offset, set the pointer now. */ + if ( startState >= 0 ) + redFsm->startState = allStates + startState; + if ( errState >= 0 ) + redFsm->errState = allStates + errState; + for ( EntryIdVect::Iter en = entryPointIds; en.lte(); en++ ) + redFsm->entryPoints.insert( allStates + *en ); + + /* The nextStateId is no longer used to assign state ids (they come in set + * from the frontend now), however generation code still depends on it. + * Should eventually remove this variable. */ + redFsm->nextStateId = redFsm->stateList.length(); +} + +void CodeGenData::setStartState( unsigned long startState ) +{ + this->startState = startState; +} + +void CodeGenData::setErrorState( unsigned long errState ) +{ + this->errState = errState; +} + +void CodeGenData::addEntryPoint( char *name, unsigned long entryState ) +{ + entryPointIds.append( entryState ); + entryPointNames.append( name ); +} + +void CodeGenData::initTransList( int snum, unsigned long length ) +{ + /* Could preallocate the out range to save time growing it. For now do + * nothing. */ +} + +void CodeGenData::newTrans( int snum, int tnum, Key lowKey, + Key highKey, long targ, long action ) +{ + /* Get the current state and range. */ + RedStateAp *curState = allStates + snum; + RedTransList &destRange = curState->outRange; + + if ( curState == redFsm->errState ) + return; + + /* Make the new transitions. */ + RedStateAp *targState = targ >= 0 ? (allStates + targ) : + wantComplete ? redFsm->getErrorState() : 0; + RedAction *actionTable = action >= 0 ? (allActionTables + action) : 0; + RedTransAp *trans = redFsm->allocateTrans( targState, actionTable ); + RedTransEl transEl( lowKey, highKey, trans ); + + if ( wantComplete ) { + /* If the machine is to be complete then we need to fill any gaps with + * the error transitions. */ + if ( destRange.length() == 0 ) { + /* Range is currently empty. */ + if ( keyOps->minKey < lowKey ) { + /* The first range doesn't start at the low end. */ + Key fillHighKey = lowKey; + fillHighKey.decrement(); + + /* Create the filler with the state's error transition. */ + RedTransEl newTel( keyOps->minKey, fillHighKey, redFsm->getErrorTrans() ); + destRange.append( newTel ); + } + } + else { + /* The range list is not empty, get the the last range. */ + RedTransEl *last = &destRange[destRange.length()-1]; + Key nextKey = last->highKey; + nextKey.increment(); + if ( nextKey < lowKey ) { + /* There is a gap to fill. Make the high key. */ + Key fillHighKey = lowKey; + fillHighKey.decrement(); + + /* Create the filler with the state's error transtion. */ + RedTransEl newTel( nextKey, fillHighKey, redFsm->getErrorTrans() ); + destRange.append( newTel ); + } + } + } + + /* Filler taken care of. Append the range. */ + destRange.append( RedTransEl( lowKey, highKey, trans ) ); +} + +void CodeGenData::finishTransList( int snum ) +{ + /* Get the current state and range. */ + RedStateAp *curState = allStates + snum; + RedTransList &destRange = curState->outRange; + + if ( curState == redFsm->errState ) + return; + + /* If building a complete machine we may need filler on the end. */ + if ( wantComplete ) { + /* Check if there are any ranges already. */ + if ( destRange.length() == 0 ) { + /* Fill with the whole alphabet. */ + /* Add the range on the lower and upper bound. */ + RedTransEl newTel( keyOps->minKey, keyOps->maxKey, redFsm->getErrorTrans() ); + destRange.append( newTel ); + } + else { + /* Get the last and check for a gap on the end. */ + RedTransEl *last = &destRange[destRange.length()-1]; + if ( last->highKey < keyOps->maxKey ) { + /* Make the high key. */ + Key fillLowKey = last->highKey; + fillLowKey.increment(); + + /* Create the new range with the error trans and append it. */ + RedTransEl newTel( fillLowKey, keyOps->maxKey, redFsm->getErrorTrans() ); + destRange.append( newTel ); + } + } + } +} + +void CodeGenData::setId( int snum, int id ) +{ + RedStateAp *curState = allStates + snum; + curState->id = id; +} + +void CodeGenData::setFinal( int snum ) +{ + RedStateAp *curState = allStates + snum; + curState->isFinal = true; +} + + +void CodeGenData::setStateActions( int snum, long toStateAction, + long fromStateAction, long eofAction ) +{ + RedStateAp *curState = allStates + snum; + if ( toStateAction >= 0 ) + curState->toStateAction = allActionTables + toStateAction; + if ( fromStateAction >= 0 ) + curState->fromStateAction = allActionTables + fromStateAction; + if ( eofAction >= 0 ) + curState->eofAction = allActionTables + eofAction; +} + +void CodeGenData::resolveTargetStates( InlineList *inlineList ) +{ + for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) { + switch ( item->type ) { + case InlineItem::Goto: case InlineItem::Call: + case InlineItem::Next: case InlineItem::Entry: + item->targState = allStates + item->targId; + break; + default: + break; + } + + if ( item->children != 0 ) + resolveTargetStates( item->children ); + } +} + +void CodeGenData::closeMachine() +{ + for ( ActionList::Iter a = actionList; a.lte(); a++ ) + resolveTargetStates( a->inlineList ); + + /* Note that even if we want a complete graph we do not give the error + * state a default transition. All machines break out of the processing + * loop when in the error state. */ + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + for ( StateCondList::Iter sci = st->stateCondList; sci.lte(); sci++ ) + st->stateCondVect.append( sci ); + } +} + + +bool CodeGenData::setAlphType( char *data ) +{ + /* FIXME: This should validate the alphabet type selection. */ + HostType *alphType = hostLang->hostTypes + atoi(data); + thisKeyOps.setAlphType( alphType ); + return true; +} + +void CodeGenData::initCondSpaceList( ulong length ) +{ + allCondSpaces = new CondSpace[length]; + for ( ulong c = 0; c < length; c++ ) + condSpaceList.append( allCondSpaces + c ); +} + +void CodeGenData::newCondSpace( int cnum, int condSpaceId, Key baseKey ) +{ + CondSpace *cond = allCondSpaces + cnum; + cond->condSpaceId = condSpaceId; + cond->baseKey = baseKey; +} + +void CodeGenData::condSpaceItem( int cnum, long condActionId ) +{ + CondSpace *cond = allCondSpaces + cnum; + cond->condSet.append( allActions + condActionId ); +} + +void CodeGenData::initStateCondList( int snum, ulong length ) +{ + /* Could preallocate these, as we could with transitions. */ +} + +void CodeGenData::addStateCond( int snum, Key lowKey, Key highKey, long condNum ) +{ + RedStateAp *curState = allStates + snum; + + /* Create the new state condition. */ + StateCond *stateCond = new StateCond; + stateCond->lowKey = lowKey; + stateCond->highKey = highKey; + + /* Assign it a cond space. */ + CondSpace *condSpace = allCondSpaces + condNum; + stateCond->condSpace = condSpace; + + curState->stateCondList.append( stateCond ); +} + + +CondSpace *CodeGenData::findCondSpace( Key lowKey, Key highKey ) +{ + for ( CondSpaceList::Iter cs = condSpaceList; cs.lte(); cs++ ) { + Key csHighKey = cs->baseKey; + csHighKey += keyOps->alphSize() * (1 << cs->condSet.length()); + + if ( lowKey >= cs->baseKey && highKey <= csHighKey ) + return cs; + } + return 0; +} + +Condition *CodeGenData::findCondition( Key key ) +{ + for ( ConditionList::Iter cond = conditionList; cond.lte(); cond++ ) { + Key upperKey = cond->baseKey + (1 << cond->condSet.length()); + if ( cond->baseKey <= key && key <= upperKey ) + return cond; + } + return 0; +} + +Key CodeGenData::findMaxKey() +{ + Key maxKey = keyOps->maxKey; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + assert( st->outSingle.length() == 0 ); + assert( st->defTrans == 0 ); + + long rangeLen = st->outRange.length(); + if ( rangeLen > 0 ) { + Key highKey = st->outRange[rangeLen-1].highKey; + if ( highKey > maxKey ) + maxKey = highKey; + } + } + return maxKey; +} + +void CodeGenData::findFinalActionRefs() +{ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Rerence count out of single transitions. */ + for ( RedTransList::Iter rtel = st->outSingle; rtel.lte(); rtel++ ) { + if ( rtel->value->action != 0 ) { + rtel->value->action->numTransRefs += 1; + for ( ActionTable::Iter item = rtel->value->action->key; item.lte(); item++ ) + item->value->numTransRefs += 1; + } + } + + /* Reference count out of range transitions. */ + for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) { + if ( rtel->value->action != 0 ) { + rtel->value->action->numTransRefs += 1; + for ( ActionTable::Iter item = rtel->value->action->key; item.lte(); item++ ) + item->value->numTransRefs += 1; + } + } + + /* Reference count default transition. */ + if ( st->defTrans != 0 && st->defTrans->action != 0 ) { + st->defTrans->action->numTransRefs += 1; + for ( ActionTable::Iter item = st->defTrans->action->key; item.lte(); item++ ) + item->value->numTransRefs += 1; + } + + /* Reference count to state actions. */ + if ( st->toStateAction != 0 ) { + st->toStateAction->numToStateRefs += 1; + for ( ActionTable::Iter item = st->toStateAction->key; item.lte(); item++ ) + item->value->numToStateRefs += 1; + } + + /* Reference count from state actions. */ + if ( st->fromStateAction != 0 ) { + st->fromStateAction->numFromStateRefs += 1; + for ( ActionTable::Iter item = st->fromStateAction->key; item.lte(); item++ ) + item->value->numFromStateRefs += 1; + } + + /* Reference count EOF actions. */ + if ( st->eofAction != 0 ) { + st->eofAction->numEofRefs += 1; + for ( ActionTable::Iter item = st->eofAction->key; item.lte(); item++ ) + item->value->numEofRefs += 1; + } + } +} + +void CodeGenData::analyzeAction( Action *act, InlineList *inlineList ) +{ + for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) { + /* Only consider actions that are referenced. */ + if ( act->numRefs() > 0 ) { + if ( item->type == InlineItem::Goto || item->type == InlineItem::GotoExpr ) + redFsm->bAnyActionGotos = true; + else if ( item->type == InlineItem::Call || item->type == InlineItem::CallExpr ) + redFsm->bAnyActionCalls = true; + else if ( item->type == InlineItem::Ret ) + redFsm->bAnyActionRets = true; + } + + /* Check for various things in regular actions. */ + if ( act->numTransRefs > 0 || act->numToStateRefs > 0 || act->numFromStateRefs > 0 ) { + /* Any returns in regular actions? */ + if ( item->type == InlineItem::Ret ) + redFsm->bAnyRegActionRets = true; + + /* Any next statements in the regular actions? */ + if ( item->type == InlineItem::Next || item->type == InlineItem::NextExpr ) + redFsm->bAnyRegNextStmt = true; + + /* Any by value control in regular actions? */ + if ( item->type == InlineItem::CallExpr || item->type == InlineItem::GotoExpr ) + redFsm->bAnyRegActionByValControl = true; + + /* Any references to the current state in regular actions? */ + if ( item->type == InlineItem::Curs ) + redFsm->bAnyRegCurStateRef = true; + + if ( item->type == InlineItem::Break ) + redFsm->bAnyRegBreak = true; + + if ( item->type == InlineItem::LmSwitch && item->handlesError ) + redFsm->bAnyLmSwitchError = true; + } + + if ( item->children != 0 ) + analyzeAction( act, item->children ); + } +} + +void CodeGenData::analyzeActionList( RedAction *redAct, InlineList *inlineList ) +{ + for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) { + /* Any next statements in the action table? */ + if ( item->type == InlineItem::Next || item->type == InlineItem::NextExpr ) + redAct->bAnyNextStmt = true; + + /* Any references to the current state. */ + if ( item->type == InlineItem::Curs ) + redAct->bAnyCurStateRef = true; + + if ( item->type == InlineItem::Break ) + redAct->bAnyBreakStmt = true; + + if ( item->children != 0 ) + analyzeActionList( redAct, item->children ); + } +} + +/* Assign ids to referenced actions. */ +void CodeGenData::assignActionIds() +{ + int nextActionId = 0; + for ( ActionList::Iter act = actionList; act.lte(); act++ ) { + /* Only ever interested in referenced actions. */ + if ( act->numRefs() > 0 ) + act->actionId = nextActionId++; + } +} + +void CodeGenData::setValueLimits() +{ + redFsm->maxSingleLen = 0; + redFsm->maxRangeLen = 0; + redFsm->maxKeyOffset = 0; + redFsm->maxIndexOffset = 0; + redFsm->maxActListId = 0; + redFsm->maxActionLoc = 0; + redFsm->maxActArrItem = 0; + redFsm->maxSpan = 0; + redFsm->maxCondSpan = 0; + redFsm->maxFlatIndexOffset = 0; + redFsm->maxCondOffset = 0; + redFsm->maxCondLen = 0; + redFsm->maxCondSpaceId = 0; + redFsm->maxCondIndexOffset = 0; + + /* In both of these cases the 0 index is reserved for no value, so the max + * is one more than it would be if they started at 0. */ + redFsm->maxIndex = redFsm->transSet.length(); + redFsm->maxCond = condSpaceList.length(); + + /* The nextStateId - 1 is the last state id assigned. */ + redFsm->maxState = redFsm->nextStateId - 1; + + for ( CondSpaceList::Iter csi = condSpaceList; csi.lte(); csi++ ) { + if ( csi->condSpaceId > redFsm->maxCondSpaceId ) + redFsm->maxCondSpaceId = csi->condSpaceId; + } + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Maximum cond length. */ + if ( st->stateCondList.length() > redFsm->maxCondLen ) + redFsm->maxCondLen = st->stateCondList.length(); + + /* Maximum single length. */ + if ( st->outSingle.length() > redFsm->maxSingleLen ) + redFsm->maxSingleLen = st->outSingle.length(); + + /* Maximum range length. */ + if ( st->outRange.length() > redFsm->maxRangeLen ) + redFsm->maxRangeLen = st->outRange.length(); + + /* The key offset index offset for the state after last is not used, skip it.. */ + if ( ! st.last() ) { + redFsm->maxCondOffset += st->stateCondList.length(); + redFsm->maxKeyOffset += st->outSingle.length() + st->outRange.length()*2; + redFsm->maxIndexOffset += st->outSingle.length() + st->outRange.length() + 1; + } + + /* Max cond span. */ + if ( st->condList != 0 ) { + unsigned long long span = keyOps->span( st->condLowKey, st->condHighKey ); + if ( span > redFsm->maxCondSpan ) + redFsm->maxCondSpan = span; + } + + /* Max key span. */ + if ( st->transList != 0 ) { + unsigned long long span = keyOps->span( st->lowKey, st->highKey ); + if ( span > redFsm->maxSpan ) + redFsm->maxSpan = span; + } + + /* Max cond index offset. */ + if ( ! st.last() ) { + if ( st->condList != 0 ) + redFsm->maxCondIndexOffset += keyOps->span( st->condLowKey, st->condHighKey ); + } + + /* Max flat index offset. */ + if ( ! st.last() ) { + if ( st->transList != 0 ) + redFsm->maxFlatIndexOffset += keyOps->span( st->lowKey, st->highKey ); + redFsm->maxFlatIndexOffset += 1; + } + } + + for ( ActionTableMap::Iter at = redFsm->actionMap; at.lte(); at++ ) { + /* Maximum id of action lists. */ + if ( at->actListId+1 > redFsm->maxActListId ) + redFsm->maxActListId = at->actListId+1; + + /* Maximum location of items in action array. */ + if ( at->location+1 > redFsm->maxActionLoc ) + redFsm->maxActionLoc = at->location+1; + + /* Maximum values going into the action array. */ + if ( at->key.length() > redFsm->maxActArrItem ) + redFsm->maxActArrItem = at->key.length(); + for ( ActionTable::Iter item = at->key; item.lte(); item++ ) { + if ( item->value->actionId > redFsm->maxActArrItem ) + redFsm->maxActArrItem = item->value->actionId; + } + } +} + + + +/* Gather various info on the machine. */ +void CodeGenData::analyzeMachine() +{ + /* Find the true count of action references. */ + findFinalActionRefs(); + + /* Check if there are any calls in action code. */ + for ( ActionList::Iter act = actionList; act.lte(); act++ ) { + /* Record the occurrence of various kinds of actions. */ + if ( act->numToStateRefs > 0 ) + redFsm->bAnyToStateActions = true; + if ( act->numFromStateRefs > 0 ) + redFsm->bAnyFromStateActions = true; + if ( act->numEofRefs > 0 ) + redFsm->bAnyEofActions = true; + if ( act->numTransRefs > 0 ) + redFsm->bAnyRegActions = true; + + /* Recurse through the action's parse tree looking for various things. */ + analyzeAction( act, act->inlineList ); + } + + /* Analyze reduced action lists. */ + for ( ActionTableMap::Iter redAct = redFsm->actionMap; redAct.lte(); redAct++ ) { + for ( ActionTable::Iter act = redAct->key; act.lte(); act++ ) + analyzeActionList( redAct, act->value->inlineList ); + } + + /* Find states that have transitions with actions that have next + * statements. */ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Check any actions out of outSinge. */ + for ( RedTransList::Iter rtel = st->outSingle; rtel.lte(); rtel++ ) { + if ( rtel->value->action != 0 && rtel->value->action->anyCurStateRef() ) + st->bAnyRegCurStateRef = true; + } + + /* Check any actions out of outRange. */ + for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) { + if ( rtel->value->action != 0 && rtel->value->action->anyCurStateRef() ) + st->bAnyRegCurStateRef = true; + } + + /* Check any action out of default. */ + if ( st->defTrans != 0 && st->defTrans->action != 0 && + st->defTrans->action->anyCurStateRef() ) + st->bAnyRegCurStateRef = true; + + if ( st->stateCondList.length() > 0 ) + redFsm->bAnyConditions = true; + } + + /* Assign ids to actions that are referenced. */ + assignActionIds(); + + /* Set the maximums of various values used for deciding types. */ + setValueLimits(); +} + +void CodeGenData::writeStatement( InputLoc &loc, int nargs, char **args ) +{ + /* FIXME: This should be moved to the virtual functions in the code + * generators. + * + * Force a newline. */ + out << "\n"; + genLineDirective( out ); + + if ( strcmp( args[0], "data" ) == 0 ) { + for ( int i = 1; i < nargs; i++ ) { + if ( strcmp( args[i], "noerror" ) == 0 ) + writeErr = false; + else if ( strcmp( args[i], "noprefix" ) == 0 ) + dataPrefix = false; + else if ( strcmp( args[i], "nofinal" ) == 0 ) + writeFirstFinal = false; + else { + source_warning(loc) << "unrecognized write option \"" << + args[i] << "\"" << endl; + } + } + writeData(); + } + else if ( strcmp( args[0], "init" ) == 0 ) { + for ( int i = 1; i < nargs; i++ ) { + source_warning(loc) << "unrecognized write option \"" << + args[i] << "\"" << endl; + } + writeInit(); + } + else if ( strcmp( args[0], "exec" ) == 0 ) { + for ( int i = 1; i < nargs; i++ ) { + if ( strcmp( args[i], "noend" ) == 0 ) + hasEnd = false; + else { + source_warning(loc) << "unrecognized write option \"" << + args[i] << "\"" << endl; + } + } + writeExec(); + } + else if ( strcmp( args[0], "eof" ) == 0 ) { + for ( int i = 1; i < nargs; i++ ) { + source_warning(loc) << "unrecognized write option \"" << + args[i] << "\"" << endl; + } + writeEOF(); + } + else if ( strcmp( args[0], "exports" ) == 0 ) { + for ( int i = 1; i < nargs; i++ ) { + source_warning(loc) << "unrecognized write option \"" << + args[i] << "\"" << endl; + } + writeExports(); + } + else { + /* EMIT An error here. */ + source_error(loc) << "unrecognized write command \"" << + args[0] << "\"" << endl; + } +} + +ostream &CodeGenData::source_warning( const InputLoc &loc ) +{ + cerr << sourceFileName << ":" << loc.line << ":" << loc.col << ": warning: "; + return cerr; +} + +ostream &CodeGenData::source_error( const InputLoc &loc ) +{ + codeGenErrCount += 1; + assert( sourceFileName != 0 ); + cerr << sourceFileName << ":" << loc.line << ":" << loc.col << ": "; + return cerr; +} + + diff --git a/contrib/tools/ragel5/redfsm/gendata.h b/contrib/tools/ragel5/redfsm/gendata.h new file mode 100644 index 0000000000..855e0710a7 --- /dev/null +++ b/contrib/tools/ragel5/redfsm/gendata.h @@ -0,0 +1,167 @@ +/* + * Copyright 2005-2007 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _GENDATA_H +#define _GENDATA_H + +#include <iostream> +#include "redfsm.h" +#include "common.h" + +using std::ostream; + +struct NameInst; +typedef DList<Action> ActionList; + +typedef unsigned long ulong; + +struct FsmCodeGen; +struct CodeGenData; + +typedef AvlMap<char *, CodeGenData*, CmpStr> CodeGenMap; +typedef AvlMapEl<char *, CodeGenData*> CodeGenMapEl; + +/* + * The interface to the parser + */ + +/* These functions must be implemented by the code generation executable. + * The openOutput function is invoked when the root element is opened. The + * makeCodeGen function is invoked when a ragel_def element is opened. */ +std::ostream *openOutput( char *inputFile ); +CodeGenData *makeCodeGen( char *sourceFileName, + char *fsmName, ostream &out, bool wantComplete ); + +void lineDirective( ostream &out, char *fileName, int line ); +void genLineDirective( ostream &out ); + +/*********************************/ + +struct CodeGenData +{ + /* + * The interface to the code generator. + */ + virtual void finishRagelDef() {} + + /* These are invoked by the corresponding write statements. */ + virtual void writeData() {}; + virtual void writeInit() {}; + virtual void writeExec() {}; + virtual void writeEOF() {}; + virtual void writeExports() {}; + + /* This can also be overwridden to modify the processing of write + * statements. */ + virtual void writeStatement( InputLoc &loc, int nargs, char **args ); + + /********************/ + + CodeGenData( ostream &out ); + virtual ~CodeGenData() {} + + /* + * Collecting the machine. + */ + + char *sourceFileName; + char *fsmName; + ostream &out; + RedFsmAp *redFsm; + Action *allActions; + RedAction *allActionTables; + Condition *allConditions; + CondSpace *allCondSpaces; + RedStateAp *allStates; + NameInst **nameIndex; + int startState; + int errState; + ActionList actionList; + ConditionList conditionList; + CondSpaceList condSpaceList; + InlineList *getKeyExpr; + InlineList *accessExpr; + InlineList *curStateExpr; + KeyOps thisKeyOps; + bool wantComplete; + EntryIdVect entryPointIds; + EntryNameVect entryPointNames; + bool hasLongestMatch; + int codeGenErrCount; + ExportList exportList; + + /* Write options. */ + bool hasEnd; + bool dataPrefix; + bool writeFirstFinal; + bool writeErr; + + void createMachine(); + void initActionList( unsigned long length ); + void newAction( int anum, char *name, int line, int col, InlineList *inlineList ); + void initActionTableList( unsigned long length ); + void initStateList( unsigned long length ); + void setStartState( unsigned long startState ); + void setErrorState( unsigned long errState ); + void addEntryPoint( char *name, unsigned long entryState ); + void setId( int snum, int id ); + void setFinal( int snum ); + void initTransList( int snum, unsigned long length ); + void newTrans( int snum, int tnum, Key lowKey, Key highKey, + long targ, long act ); + void finishTransList( int snum ); + void setStateActions( int snum, long toStateAction, + long fromStateAction, long eofAction ); + void setForcedErrorState() + { redFsm->forcedErrorState = true; } + + + void initCondSpaceList( ulong length ); + void condSpaceItem( int cnum, long condActionId ); + void newCondSpace( int cnum, int condSpaceId, Key baseKey ); + + void initStateCondList( int snum, ulong length ); + void addStateCond( int snum, Key lowKey, Key highKey, long condNum ); + + CondSpace *findCondSpace( Key lowKey, Key highKey ); + Condition *findCondition( Key key ); + + bool setAlphType( char *data ); + + void resolveTargetStates( InlineList *inlineList ); + Key findMaxKey(); + + /* Gather various info on the machine. */ + void analyzeActionList( RedAction *redAct, InlineList *inlineList ); + void analyzeAction( Action *act, InlineList *inlineList ); + void findFinalActionRefs(); + void analyzeMachine(); + + void closeMachine(); + void setValueLimits(); + void assignActionIds(); + + ostream &source_warning( const InputLoc &loc ); + ostream &source_error( const InputLoc &loc ); +}; + + +#endif /* _GENDATA_H */ diff --git a/contrib/tools/ragel5/redfsm/phash.h b/contrib/tools/ragel5/redfsm/phash.h new file mode 100644 index 0000000000..11ce7502a6 --- /dev/null +++ b/contrib/tools/ragel5/redfsm/phash.h @@ -0,0 +1,10 @@ +#pragma once + +class Perfect_Hash +{ +private: + static inline unsigned int hash (const char *str, unsigned int len); + +public: + static struct XMLTagHashPair *in_word_set (const char *str, unsigned int len); +}; diff --git a/contrib/tools/ragel5/redfsm/redfsm.cpp b/contrib/tools/ragel5/redfsm/redfsm.cpp new file mode 100644 index 0000000000..6a55b22ec7 --- /dev/null +++ b/contrib/tools/ragel5/redfsm/redfsm.cpp @@ -0,0 +1,559 @@ +/* + * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "redfsm.h" +#include "avlmap.h" +#include <iostream> +#include <sstream> + +using std::ostringstream; + +KeyOps *keyOps = 0; + +string Action::nameOrLoc() +{ + if ( name != 0 ) + return string(name); + else { + ostringstream ret; + ret << loc.line << ":" << loc.col; + return ret.str(); + } +} + +RedFsmAp::RedFsmAp() +: + wantComplete(false), + forcedErrorState(false), + nextActionId(0), + nextTransId(0), + startState(0), + errState(0), + errTrans(0), + firstFinState(0), + numFinStates(0), + bAnyToStateActions(false), + bAnyFromStateActions(false), + bAnyRegActions(false), + bAnyEofActions(false), + bAnyActionGotos(false), + bAnyActionCalls(false), + bAnyActionRets(false), + bAnyRegActionRets(false), + bAnyRegActionByValControl(false), + bAnyRegNextStmt(false), + bAnyRegCurStateRef(false), + bAnyRegBreak(false), + bAnyLmSwitchError(false), + bAnyConditions(false) +{ +} + +/* Does the machine have any actions. */ +bool RedFsmAp::anyActions() +{ + return actionMap.length() > 0; +} + +void RedFsmAp::depthFirstOrdering( RedStateAp *state ) +{ + /* Nothing to do if the state is already on the list. */ + if ( state->onStateList ) + return; + + /* Doing depth first, put state on the list. */ + state->onStateList = true; + stateList.append( state ); + + /* At this point transitions should only be in ranges. */ + assert( state->outSingle.length() == 0 ); + assert( state->defTrans == 0 ); + + /* Recurse on everything ranges. */ + for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) { + if ( rtel->value->targ != 0 ) + depthFirstOrdering( rtel->value->targ ); + } +} + +/* Ordering states by transition connections. */ +void RedFsmAp::depthFirstOrdering() +{ + /* Init on state list flags. */ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) + st->onStateList = false; + + /* Clear out the state list, we will rebuild it. */ + int stateListLen = stateList.length(); + stateList.abandon(); + + /* Add back to the state list from the start state and all other entry + * points. */ + if ( startState != 0 ) + depthFirstOrdering( startState ); + for ( RedStateSet::Iter en = entryPoints; en.lte(); en++ ) + depthFirstOrdering( *en ); + if ( forcedErrorState ) + depthFirstOrdering( errState ); + + /* Make sure we put everything back on. */ + assert( stateListLen == stateList.length() ); +} + +/* Assign state ids by appearance in the state list. */ +void RedFsmAp::sequentialStateIds() +{ + /* Table based machines depend on the state numbers starting at zero. */ + nextStateId = 0; + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) + st->id = nextStateId++; +} + +/* Stable sort the states by final state status. */ +void RedFsmAp::sortStatesByFinal() +{ + /* Move forward through the list and throw final states onto the end. */ + RedStateAp *state = 0; + RedStateAp *next = stateList.head; + RedStateAp *last = stateList.tail; + while ( state != last ) { + /* Move forward and load up the next. */ + state = next; + next = state->next; + + /* Throw to the end? */ + if ( state->isFinal ) { + stateList.detach( state ); + stateList.append( state ); + } + } +} + +/* Assign state ids by final state state status. */ +void RedFsmAp::sortStateIdsByFinal() +{ + /* Table based machines depend on this starting at zero. */ + nextStateId = 0; + + /* First pass to assign non final ids. */ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + if ( ! st->isFinal ) + st->id = nextStateId++; + } + + /* Second pass to assign final ids. */ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + if ( st->isFinal ) + st->id = nextStateId++; + } +} + +void RedFsmAp::sortByStateId() +{ + /* FIXME: Implement. */ +} + +/* Find the final state with the lowest id. */ +void RedFsmAp::findFirstFinState() +{ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + if ( st->isFinal && (firstFinState == 0 || st->id < firstFinState->id) ) + firstFinState = st; + } +} + +void RedFsmAp::assignActionLocs() +{ + int nextLocation = 0; + for ( ActionTableMap::Iter act = actionMap; act.lte(); act++ ) { + /* Store the loc, skip over the array and a null terminator. */ + act->location = nextLocation; + nextLocation += act->key.length() + 1; + } +} + +/* Check if we can extend the current range by displacing any ranges + * ahead to the singles. */ +bool RedFsmAp::canExtend( const RedTransList &list, int pos ) +{ + /* Get the transition that we want to extend. */ + RedTransAp *extendTrans = list[pos].value; + + /* Look ahead in the transition list. */ + for ( int next = pos + 1; next < list.length(); pos++, next++ ) { + /* If they are not continuous then cannot extend. */ + Key nextKey = list[next].lowKey; + nextKey.decrement(); + if ( list[pos].highKey != nextKey ) + break; + + /* Check for the extenstion property. */ + if ( extendTrans == list[next].value ) + return true; + + /* If the span of the next element is more than one, then don't keep + * checking, it won't be moved to single. */ + unsigned long long nextSpan = keyOps->span( list[next].lowKey, list[next].highKey ); + if ( nextSpan > 1 ) + break; + } + return false; +} + +/* Move ranges to the singles list. */ +void RedFsmAp::moveTransToSingle( RedStateAp *state ) +{ + RedTransList &range = state->outRange; + RedTransList &single = state->outSingle; + for ( int rpos = 0; rpos < range.length(); ) { + /* Check if this is a range we can extend. */ + if ( canExtend( range, rpos ) ) { + /* Transfer singles over. */ + while ( range[rpos].value != range[rpos+1].value ) { + /* Transfer the range to single. */ + single.append( range[rpos+1] ); + range.remove( rpos+1 ); + } + + /* Extend. */ + range[rpos].highKey = range[rpos+1].highKey; + range.remove( rpos+1 ); + } + /* Maybe move it to the singles. */ + else if ( keyOps->span( range[rpos].lowKey, range[rpos].highKey ) == 1 ) { + single.append( range[rpos] ); + range.remove( rpos ); + } + else { + /* Keeping it in the ranges. */ + rpos += 1; + } + } +} + +/* Look through ranges and choose suitable single character transitions. */ +void RedFsmAp::chooseSingle() +{ + /* Loop the states. */ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + /* Rewrite the transition list taking out the suitable single + * transtions. */ + moveTransToSingle( st ); + } +} + +void RedFsmAp::makeFlat() +{ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + if ( st->stateCondList.length() == 0 ) { + st->condLowKey = 0; + st->condHighKey = 0; + } + else { + st->condLowKey = st->stateCondList.head->lowKey; + st->condHighKey = st->stateCondList.tail->highKey; + + unsigned long long span = keyOps->span( st->condLowKey, st->condHighKey ); + st->condList = new CondSpace*[ span ]; + memset( st->condList, 0, sizeof(CondSpace*)*span ); + + for ( StateCondList::Iter sci = st->stateCondList; sci.lte(); sci++ ) { + unsigned long long base, trSpan; + base = keyOps->span( st->condLowKey, sci->lowKey )-1; + trSpan = keyOps->span( sci->lowKey, sci->highKey ); + for ( unsigned long long pos = 0; pos < trSpan; pos++ ) + st->condList[base+pos] = sci->condSpace; + } + } + + if ( st->outRange.length() == 0 ) { + st->lowKey = st->highKey = 0; + st->transList = 0; + } + else { + st->lowKey = st->outRange[0].lowKey; + st->highKey = st->outRange[st->outRange.length()-1].highKey; + unsigned long long span = keyOps->span( st->lowKey, st->highKey ); + st->transList = new RedTransAp*[ span ]; + memset( st->transList, 0, sizeof(RedTransAp*)*span ); + + for ( RedTransList::Iter trans = st->outRange; trans.lte(); trans++ ) { + unsigned long long base, trSpan; + base = keyOps->span( st->lowKey, trans->lowKey )-1; + trSpan = keyOps->span( trans->lowKey, trans->highKey ); + for ( unsigned long long pos = 0; pos < trSpan; pos++ ) + st->transList[base+pos] = trans->value; + } + + /* Fill in the gaps with the default transition. */ + for ( unsigned long long pos = 0; pos < span; pos++ ) { + if ( st->transList[pos] == 0 ) + st->transList[pos] = st->defTrans; + } + } + } +} + + +/* A default transition has been picked, move it from the outRange to the + * default pointer. */ +void RedFsmAp::moveToDefault( RedTransAp *defTrans, RedStateAp *state ) +{ + /* Rewrite the outRange, omitting any ranges that use + * the picked default. */ + RedTransList outRange; + for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) { + /* If it does not take the default, copy it over. */ + if ( rtel->value != defTrans ) + outRange.append( *rtel ); + } + + /* Save off the range we just created into the state's range. */ + state->outRange.transfer( outRange ); + + /* Store the default. */ + state->defTrans = defTrans; +} + +bool RedFsmAp::alphabetCovered( RedTransList &outRange ) +{ + /* Cannot cover without any out ranges. */ + if ( outRange.length() == 0 ) + return false; + + /* If the first range doesn't start at the the lower bound then the + * alphabet is not covered. */ + RedTransList::Iter rtel = outRange; + if ( keyOps->minKey < rtel->lowKey ) + return false; + + /* Check that every range is next to the previous one. */ + rtel.increment(); + for ( ; rtel.lte(); rtel++ ) { + Key highKey = rtel[-1].highKey; + highKey.increment(); + if ( highKey != rtel->lowKey ) + return false; + } + + /* The last must extend to the upper bound. */ + RedTransEl *last = &outRange[outRange.length()-1]; + if ( last->highKey < keyOps->maxKey ) + return false; + + return true; +} + +RedTransAp *RedFsmAp::chooseDefaultSpan( RedStateAp *state ) +{ + /* Make a set of transitions from the outRange. */ + RedTransSet stateTransSet; + for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) + stateTransSet.insert( rtel->value ); + + /* For each transition in the find how many alphabet characters the + * transition spans. */ + unsigned long long *span = new unsigned long long[stateTransSet.length()]; + memset( span, 0, sizeof(unsigned long long) * stateTransSet.length() ); + for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) { + /* Lookup the transition in the set. */ + RedTransAp **inSet = stateTransSet.find( rtel->value ); + int pos = inSet - stateTransSet.data; + span[pos] += keyOps->span( rtel->lowKey, rtel->highKey ); + } + + /* Find the max span, choose it for making the default. */ + RedTransAp *maxTrans = 0; + unsigned long long maxSpan = 0; + for ( RedTransSet::Iter rtel = stateTransSet; rtel.lte(); rtel++ ) { + if ( span[rtel.pos()] > maxSpan ) { + maxSpan = span[rtel.pos()]; + maxTrans = *rtel; + } + } + + delete[] span; + return maxTrans; +} + +/* Pick default transitions from ranges for the states. */ +void RedFsmAp::chooseDefaultSpan() +{ + /* Loop the states. */ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + /* Only pick a default transition if the alphabet is covered. This + * avoids any transitions in the out range that go to error and avoids + * the need for an ERR state. */ + if ( alphabetCovered( st->outRange ) ) { + /* Pick a default transition by largest span. */ + RedTransAp *defTrans = chooseDefaultSpan( st ); + + /* Rewrite the transition list taking out the transition we picked + * as the default and store the default. */ + moveToDefault( defTrans, st ); + } + } +} + +RedTransAp *RedFsmAp::chooseDefaultGoto( RedStateAp *state ) +{ + /* Make a set of transitions from the outRange. */ + RedTransSet stateTransSet; + for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) { + if ( rtel->value->targ == state->next ) + return rtel->value; + } + return 0; +} + +void RedFsmAp::chooseDefaultGoto() +{ + /* Loop the states. */ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + /* Pick a default transition. */ + RedTransAp *defTrans = chooseDefaultGoto( st ); + if ( defTrans == 0 ) + defTrans = chooseDefaultSpan( st ); + + /* Rewrite the transition list taking out the transition we picked + * as the default and store the default. */ + moveToDefault( defTrans, st ); + } +} + +RedTransAp *RedFsmAp::chooseDefaultNumRanges( RedStateAp *state ) +{ + /* Make a set of transitions from the outRange. */ + RedTransSet stateTransSet; + for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) + stateTransSet.insert( rtel->value ); + + /* For each transition in the find how many ranges use the transition. */ + int *numRanges = new int[stateTransSet.length()]; + memset( numRanges, 0, sizeof(int) * stateTransSet.length() ); + for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) { + /* Lookup the transition in the set. */ + RedTransAp **inSet = stateTransSet.find( rtel->value ); + numRanges[inSet - stateTransSet.data] += 1; + } + + /* Find the max number of ranges. */ + RedTransAp *maxTrans = 0; + int maxNumRanges = 0; + for ( RedTransSet::Iter rtel = stateTransSet; rtel.lte(); rtel++ ) { + if ( numRanges[rtel.pos()] > maxNumRanges ) { + maxNumRanges = numRanges[rtel.pos()]; + maxTrans = *rtel; + } + } + + delete[] numRanges; + return maxTrans; +} + +void RedFsmAp::chooseDefaultNumRanges() +{ + /* Loop the states. */ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + /* Pick a default transition. */ + RedTransAp *defTrans = chooseDefaultNumRanges( st ); + + /* Rewrite the transition list taking out the transition we picked + * as the default and store the default. */ + moveToDefault( defTrans, st ); + } +} + +RedTransAp *RedFsmAp::getErrorTrans( ) +{ + /* If the error trans has not been made aready, make it. */ + if ( errTrans == 0 ) { + /* This insert should always succeed since no transition created by + * the user can point to the error state. */ + errTrans = new RedTransAp( getErrorState(), 0, nextTransId++ ); + RedTransAp *inRes = transSet.insert( errTrans ); + assert( inRes != 0 ); + } + return errTrans; +} + +RedStateAp *RedFsmAp::getErrorState() +{ + /* Something went wrong. An error state is needed but one was not supplied + * by the frontend. */ + assert( errState != 0 ); + return errState; +} + + +RedTransAp *RedFsmAp::allocateTrans( RedStateAp *targ, RedAction *action ) +{ + /* Create a reduced trans and look for it in the transiton set. */ + RedTransAp redTrans( targ, action, 0 ); + RedTransAp *inDict = transSet.find( &redTrans ); + if ( inDict == 0 ) { + inDict = new RedTransAp( targ, action, nextTransId++ ); + transSet.insert( inDict ); + } + return inDict; +} + +void RedFsmAp::partitionFsm( int nparts ) +{ + /* At this point the states are ordered by a depth-first traversal. We + * will allocate to partitions based on this ordering. */ + this->nParts = nparts; + int partSize = stateList.length() / nparts; + int remainder = stateList.length() % nparts; + int numInPart = partSize; + int partition = 0; + if ( remainder-- > 0 ) + numInPart += 1; + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + st->partition = partition; + + numInPart -= 1; + if ( numInPart == 0 ) { + partition += 1; + numInPart = partSize; + if ( remainder-- > 0 ) + numInPart += 1; + } + } +} + +void RedFsmAp::setInTrans() +{ + /* First pass counts the number of transitions. */ + for ( TransApSet::Iter trans = transSet; trans.lte(); trans++ ) + trans->targ->numInTrans += 1; + + /* Pass over states to allocate the needed memory. Reset the counts so we + * can use them as the current size. */ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + st->inTrans = new RedTransAp*[st->numInTrans]; + st->numInTrans = 0; + } + + /* Second pass over transitions copies pointers into the in trans list. */ + for ( TransApSet::Iter trans = transSet; trans.lte(); trans++ ) + trans->targ->inTrans[trans->targ->numInTrans++] = trans; +} diff --git a/contrib/tools/ragel5/redfsm/redfsm.h b/contrib/tools/ragel5/redfsm/redfsm.h new file mode 100644 index 0000000000..515b1b621b --- /dev/null +++ b/contrib/tools/ragel5/redfsm/redfsm.h @@ -0,0 +1,534 @@ +/* + * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _REDFSM_H +#define _REDFSM_H + +#include <assert.h> +#include <string.h> +#include <string> +#include "common.h" +#include "vector.h" +#include "dlist.h" +#include "compare.h" +#include "bstmap.h" +#include "bstset.h" +#include "avlmap.h" +#include "avltree.h" +#include "avlbasic.h" +#include "mergesort.h" +#include "sbstmap.h" +#include "sbstset.h" +#include "sbsttable.h" + +#define TRANS_ERR_TRANS 0 +#define STATE_ERR_STATE 0 +#define FUNC_NO_FUNC 0 + +using std::string; + +struct RedStateAp; +struct InlineList; +struct Action; + +/* Location in an input file. */ +struct InputLoc +{ + int line; + int col; +}; + +/* + * Inline code tree + */ +struct InlineItem +{ + enum Type + { + Text, Goto, Call, Next, GotoExpr, CallExpr, NextExpr, Ret, + PChar, Char, Hold, Exec, HoldTE, ExecTE, Curs, Targs, Entry, + LmSwitch, LmSetActId, LmSetTokEnd, LmGetTokEnd, LmInitTokStart, + LmInitAct, LmSetTokStart, SubAction, Break + }; + + InlineItem( const InputLoc &loc, Type type ) : + loc(loc), data(0), targId(0), targState(0), + lmId(0), children(0), offset(0), + handlesError(false), type(type) { } + + InputLoc loc; + char *data; + int targId; + RedStateAp *targState; + int lmId; + InlineList *children; + int offset; + bool handlesError; + Type type; + + InlineItem *prev, *next; +}; + +/* Normally this would be atypedef, but that would entail including DList from + * ptreetypes, which should be just typedef forwards. */ +struct InlineList : public DList<InlineItem> { }; + +/* Element in list of actions. Contains the string for the code to exectute. */ +struct Action +: + public DListEl<Action> +{ + Action( ) + : + name(0), + inlineList(0), + actionId(0), + numTransRefs(0), + numToStateRefs(0), + numFromStateRefs(0), + numEofRefs(0) + { + } + + /* Data collected during parse. */ + InputLoc loc; + char *name; + InlineList *inlineList; + int actionId; + + string nameOrLoc(); + + /* Number of references in the final machine. */ + int numRefs() + { return numTransRefs + numToStateRefs + numFromStateRefs + numEofRefs; } + int numTransRefs; + int numToStateRefs; + int numFromStateRefs; + int numEofRefs; +}; + + +/* Forwards. */ +struct RedStateAp; +struct StateAp; + +/* Transistion Action Element. */ +typedef SBstMapEl< int, Action* > ActionTableEl; + +/* Transition Action Table. */ +struct ActionTable + : public SBstMap< int, Action*, CmpOrd<int> > +{ + void setAction( int ordering, Action *action ); + void setActions( int *orderings, Action **actions, int nActs ); + void setActions( const ActionTable &other ); +}; + +/* Compare of a whole action table element (key & value). */ +struct CmpActionTableEl +{ + static int compare( const ActionTableEl &action1, + const ActionTableEl &action2 ) + { + if ( action1.key < action2.key ) + return -1; + else if ( action1.key > action2.key ) + return 1; + else if ( action1.value < action2.value ) + return -1; + else if ( action1.value > action2.value ) + return 1; + return 0; + } +}; + +/* Compare for ActionTable. */ +typedef CmpSTable< ActionTableEl, CmpActionTableEl > CmpActionTable; + +/* Set of states. */ +typedef BstSet<RedStateAp*> RedStateSet; +typedef BstSet<int> IntSet; + +/* Reduced action. */ +struct RedAction +: + public AvlTreeEl<RedAction> +{ + RedAction( ) + : + key(), + eofRefs(0), + numTransRefs(0), + numToStateRefs(0), + numFromStateRefs(0), + numEofRefs(0), + bAnyNextStmt(false), + bAnyCurStateRef(false), + bAnyBreakStmt(false) + { } + + const ActionTable &getKey() + { return key; } + + ActionTable key; + int actListId; + int location; + IntSet *eofRefs; + + /* Number of references in the final machine. */ + int numRefs() + { return numTransRefs + numToStateRefs + numFromStateRefs + numEofRefs; } + int numTransRefs; + int numToStateRefs; + int numFromStateRefs; + int numEofRefs; + + bool anyNextStmt() { return bAnyNextStmt; } + bool anyCurStateRef() { return bAnyCurStateRef; } + bool anyBreakStmt() { return bAnyBreakStmt; } + + bool bAnyNextStmt; + bool bAnyCurStateRef; + bool bAnyBreakStmt; +}; +typedef AvlTree<RedAction, ActionTable, CmpActionTable> ActionTableMap; + +/* Reduced transition. */ +struct RedTransAp +: + public AvlTreeEl<RedTransAp> +{ + RedTransAp( RedStateAp *targ, RedAction *action, int id ) + : targ(targ), action(action), id(id), labelNeeded(true) { } + + RedStateAp *targ; + RedAction *action; + int id; + bool partitionBoundary; + bool labelNeeded; +}; + +/* Compare of transitions for the final reduction of transitions. Comparison + * is on target and the pointer to the shared action table. It is assumed that + * when this is used the action tables have been reduced. */ +struct CmpRedTransAp +{ + static int compare( const RedTransAp &t1, const RedTransAp &t2 ) + { + if ( t1.targ < t2.targ ) + return -1; + else if ( t1.targ > t2.targ ) + return 1; + else if ( t1.action < t2.action ) + return -1; + else if ( t1.action > t2.action ) + return 1; + else + return 0; + } +}; + +typedef AvlBasic<RedTransAp, CmpRedTransAp> TransApSet; + +/* Element in out range. */ +struct RedTransEl +{ + /* Constructors. */ + RedTransEl( Key lowKey, Key highKey, RedTransAp *value ) + : lowKey(lowKey), highKey(highKey), value(value) { } + + Key lowKey, highKey; + RedTransAp *value; +}; + +typedef Vector<RedTransEl> RedTransList; +typedef Vector<RedStateAp*> RedStateVect; + +typedef BstMapEl<RedStateAp*, unsigned long long> RedSpanMapEl; +typedef BstMap<RedStateAp*, unsigned long long> RedSpanMap; + +/* Compare used by span map sort. Reverse sorts by the span. */ +struct CmpRedSpanMapEl +{ + static int compare( const RedSpanMapEl &smel1, const RedSpanMapEl &smel2 ) + { + if ( smel1.value > smel2.value ) + return -1; + else if ( smel1.value < smel2.value ) + return 1; + else + return 0; + } +}; + +/* Sorting state-span map entries by span. */ +typedef MergeSort<RedSpanMapEl, CmpRedSpanMapEl> RedSpanMapSort; + +/* Set of entry ids that go into this state. */ +typedef Vector<int> EntryIdVect; +typedef Vector<char*> EntryNameVect; + +typedef Vector< Action* > CondSet; + +struct Condition +{ + Condition( ) + : key(0), baseKey(0) {} + + Key key; + Key baseKey; + CondSet condSet; + + Condition *next, *prev; +}; +typedef DList<Condition> ConditionList; + +struct CondSpace +{ + Key baseKey; + CondSet condSet; + int condSpaceId; + + CondSpace *next, *prev; +}; +typedef DList<CondSpace> CondSpaceList; + +struct StateCond +{ + Key lowKey; + Key highKey; + + CondSpace *condSpace; + + StateCond *prev, *next; +}; +typedef DList<StateCond> StateCondList; +typedef Vector<StateCond*> StateCondVect; + +/* Reduced state. */ +struct RedStateAp +{ + RedStateAp() + : + defTrans(0), + condList(0), + transList(0), + isFinal(false), + labelNeeded(false), + outNeeded(false), + onStateList(false), + toStateAction(0), + fromStateAction(0), + eofAction(0), + id(0), + bAnyRegCurStateRef(false), + partitionBoundary(false), + inTrans(0), + numInTrans(0) + { } + + /* Transitions out. */ + RedTransList outSingle; + RedTransList outRange; + RedTransAp *defTrans; + + /* For flat conditions. */ + Key condLowKey, condHighKey; + CondSpace **condList; + + /* For flat keys. */ + Key lowKey, highKey; + RedTransAp **transList; + + /* The list of states that transitions from this state go to. */ + RedStateVect targStates; + + bool isFinal; + bool labelNeeded; + bool outNeeded; + bool onStateList; + RedAction *toStateAction; + RedAction *fromStateAction; + RedAction *eofAction; + int id; + StateCondList stateCondList; + StateCondVect stateCondVect; + + /* Pointers for the list of states. */ + RedStateAp *prev, *next; + + bool anyRegCurStateRef() { return bAnyRegCurStateRef; } + bool bAnyRegCurStateRef; + + int partition; + bool partitionBoundary; + + RedTransAp **inTrans; + int numInTrans; +}; + +/* List of states. */ +typedef DList<RedStateAp> RedStateList; + +/* Set of reduced transitons. Comparison is by pointer. */ +typedef BstSet< RedTransAp*, CmpOrd<RedTransAp*> > RedTransSet; + +/* Next version of the fsm machine. */ +struct RedFsmAp +{ + RedFsmAp(); + + bool wantComplete; + bool forcedErrorState; + + int nextActionId; + int nextTransId; + + /* Next State Id doubles as the total number of state ids. */ + int nextStateId; + + TransApSet transSet; + ActionTableMap actionMap; + RedStateList stateList; + RedStateSet entryPoints; + RedStateAp *startState; + RedStateAp *errState; + RedTransAp *errTrans; + RedTransAp *errActionTrans; + RedStateAp *firstFinState; + int numFinStates; + int nParts; + + bool bAnyToStateActions; + bool bAnyFromStateActions; + bool bAnyRegActions; + bool bAnyEofActions; + bool bAnyActionGotos; + bool bAnyActionCalls; + bool bAnyActionRets; + bool bAnyRegActionRets; + bool bAnyRegActionByValControl; + bool bAnyRegNextStmt; + bool bAnyRegCurStateRef; + bool bAnyRegBreak; + bool bAnyLmSwitchError; + bool bAnyConditions; + + int maxState; + int maxSingleLen; + int maxRangeLen; + int maxKeyOffset; + int maxIndexOffset; + int maxIndex; + int maxActListId; + int maxActionLoc; + int maxActArrItem; + unsigned long long maxSpan; + unsigned long long maxCondSpan; + int maxFlatIndexOffset; + Key maxKey; + int maxCondOffset; + int maxCondLen; + int maxCondSpaceId; + int maxCondIndexOffset; + int maxCond; + + bool anyActions(); + bool anyToStateActions() { return bAnyToStateActions; } + bool anyFromStateActions() { return bAnyFromStateActions; } + bool anyRegActions() { return bAnyRegActions; } + bool anyEofActions() { return bAnyEofActions; } + bool anyActionGotos() { return bAnyActionGotos; } + bool anyActionCalls() { return bAnyActionCalls; } + bool anyActionRets() { return bAnyActionRets; } + bool anyRegActionRets() { return bAnyRegActionRets; } + bool anyRegActionByValControl() { return bAnyRegActionByValControl; } + bool anyRegNextStmt() { return bAnyRegNextStmt; } + bool anyRegCurStateRef() { return bAnyRegCurStateRef; } + bool anyRegBreak() { return bAnyRegBreak; } + bool anyLmSwitchError() { return bAnyLmSwitchError; } + bool anyConditions() { return bAnyConditions; } + + + /* Is is it possible to extend a range by bumping ranges that span only + * one character to the singles array. */ + bool canExtend( const RedTransList &list, int pos ); + + /* Pick single transitions from the ranges. */ + void moveTransToSingle( RedStateAp *state ); + void chooseSingle(); + + void makeFlat(); + + /* Move a selected transition from ranges to default. */ + void moveToDefault( RedTransAp *defTrans, RedStateAp *state ); + + /* Pick a default transition by largest span. */ + RedTransAp *chooseDefaultSpan( RedStateAp *state ); + void chooseDefaultSpan(); + + /* Pick a default transition by most number of ranges. */ + RedTransAp *chooseDefaultNumRanges( RedStateAp *state ); + void chooseDefaultNumRanges(); + + /* Pick a default transition tailored towards goto driven machine. */ + RedTransAp *chooseDefaultGoto( RedStateAp *state ); + void chooseDefaultGoto(); + + /* Ordering states by transition connections. */ + void optimizeStateOrdering( RedStateAp *state ); + void optimizeStateOrdering(); + + /* Ordering states by transition connections. */ + void depthFirstOrdering( RedStateAp *state ); + void depthFirstOrdering(); + + /* Set state ids. */ + void sequentialStateIds(); + void sortStateIdsByFinal(); + + /* Arrange states in by final id. This is a stable sort. */ + void sortStatesByFinal(); + + /* Sorting states by id. */ + void sortByStateId(); + + /* Locating the first final state. This is the final state with the lowest + * id. */ + void findFirstFinState(); + + void assignActionLocs(); + + RedTransAp *getErrorTrans(); + RedStateAp *getErrorState(); + + /* Is every char in the alphabet covered? */ + bool alphabetCovered( RedTransList &outRange ); + + RedTransAp *allocateTrans( RedStateAp *targState, RedAction *actionTable ); + + void partitionFsm( int nParts ); + + void setInTrans(); +}; + + +#endif /* _REDFSM_H */ diff --git a/contrib/tools/ragel5/redfsm/xmlparse.cpp b/contrib/tools/ragel5/redfsm/xmlparse.cpp new file mode 100644 index 0000000000..6da8c50e91 --- /dev/null +++ b/contrib/tools/ragel5/redfsm/xmlparse.cpp @@ -0,0 +1,3549 @@ +/* Automatically generated by Kelbt from "xmlparse.kl". + * + * Parts of this file are copied from Kelbt source covered by the GNU + * GPL. As a special exception, you may use the parts of this file copied + * from Kelbt source without restriction. The remainder is derived from + * "xmlparse.kl" and inherits the copyright status of that file. + */ + +#line 1 "xmlparse.kl" +/* + * Copyright 2001-2007 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "xmlparse.h" +#include "common.h" +#include "gendata.h" +#include <iostream> + +#include <stdlib.h> +//#include <malloc.h> + +using std::cout; +using std::ostream; +using std::istream; +using std::cerr; +using std::endl; + +Key readKey( char *td, char **end ); +long readOffsetPtr( char *td, char **end ); +unsigned long readLength( char *td ); + +#line 117 "xmlparse.kh" +#line 120 "xmlparse.kh" +#line 163 "xmlparse.kh" +#line 846 "xmlparse.kl" + + +#line 54 "xmlparse.cpp" +struct Parser_Lel_inline_item_type +{ +#line 499 "xmlparse.kl" + + InlineItem *inlineItem; + + +#line 61 "xmlparse.cpp" +}; + +struct Parser_Lel_inline_list +{ +#line 480 "xmlparse.kl" + + InlineList *inlineList; + + +#line 71 "xmlparse.cpp" +}; + +struct Parser_Lel_lm_action_list +{ +#line 716 "xmlparse.kl" + + InlineList *inlineList; + + +#line 81 "xmlparse.cpp" +}; + +struct Parser_Lel_tag_arg +{ +#line 256 "xmlparse.kl" + + char *option; + + +#line 91 "xmlparse.cpp" +}; + +struct Parser_Lel_tag_write_head +{ +#line 220 "xmlparse.kl" + + InputLoc loc; + + +#line 101 "xmlparse.cpp" +}; + +union Parser_UserData +{ + struct Parser_Lel_inline_item_type inline_item_type; + struct Parser_Lel_inline_list inline_list; + struct Parser_Lel_lm_action_list lm_action_list; + struct Parser_Lel_tag_arg tag_arg; + struct Parser_Lel_tag_write_head tag_write_head; + struct Token token; +}; + +struct Parser_LangEl +{ + char *file; + int line; + int type; + int reduction; + int state; + union Parser_UserData user; + unsigned int retry; + struct Parser_LangEl *next, *child; +}; + +#line 127 "xmlparse.cpp" +unsigned int Parser_startState = 0; + +short Parser_indicies[] = { + 142, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + 140, 139, 0, 1, 283, 144, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + 144, 144, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 144, -1, -1, -1, -1, -1, + -1, -1, -1, 2, 146, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 151, + 146, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, 146, -1, -1, -1, -1, -1, -1, + -1, -1, -1, 3, 143, -1, -1, -1, + 4, 5, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 6, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 169, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 145, 147, 148, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, 7, 153, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, 153, -1, -1, -1, -1, + -1, -1, 153, -1, 153, -1, -1, -1, + -1, -1, -1, -1, 153, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + 153, 153, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 8, + 141, 9, 171, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, 171, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, 10, 11, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 174, -1, -1, + -1, -1, -1, -1, 12, -1, 13, -1, + -1, -1, -1, -1, -1, -1, 16, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 15, 14, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 152, 154, 155, 156, 157, 158, + 159, -1, -1, -1, -1, -1, -1, 17, + 149, 18, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 19, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, 170, 150, 20, 217, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, 217, -1, -1, -1, -1, + -1, -1, 217, 217, 217, 217, 217, 217, + 217, 217, 217, 217, 217, 217, 217, 217, + 217, 217, 217, -1, 217, 217, 217, 217, + 217, 217, 217, -1, -1, -1, 217, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 21, 217, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, 217, -1, -1, -1, -1, + -1, -1, 217, 217, 217, 217, 217, 217, + 217, 217, 217, 217, 217, 217, 217, 217, + 217, 217, 217, -1, 217, 217, 217, 217, + 217, 217, 217, -1, -1, -1, 217, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 24, 217, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, 217, -1, -1, -1, -1, + -1, -1, 217, 217, 217, 217, 217, 217, + 217, 217, 217, 217, 217, 217, 217, 217, + 217, 217, 217, -1, 217, 217, 217, 217, + 217, 217, 217, -1, -1, -1, 217, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 23, 162, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, 162, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, 22, 176, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, 176, -1, -1, -1, -1, 176, 176, + 176, 176, -1, -1, -1, -1, -1, -1, + 176, -1, 176, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + 25, 168, 26, 164, 27, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, 52, -1, -1, -1, -1, -1, -1, + 28, 29, 30, 31, 32, 33, 34, 35, + 37, 38, 39, 40, 41, 42, 43, 44, + 45, -1, 53, 47, 51, 50, 48, 46, + 49, -1, -1, -1, 36, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 216, -1, 218, 219, + 220, 221, 222, 223, 224, 225, 226, 227, + 228, 229, 230, 231, 232, 233, 234, 235, + 236, 237, 238, 239, 240, 241, 242, 243, + 54, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, 55, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 161, 56, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 52, -1, -1, -1, + -1, -1, -1, 28, 29, 30, 31, 32, + 33, 34, 35, 37, 38, 39, 40, 41, + 42, 43, 44, 45, -1, 53, 47, 51, + 50, 48, 46, 49, -1, -1, -1, 36, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 216, + -1, 218, 219, 220, 221, 222, 223, 224, + 225, 226, 227, 228, 229, 230, 231, 232, + 233, 234, 235, 236, 237, 238, 239, 240, + 241, 242, 243, 57, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + 52, -1, -1, -1, -1, -1, -1, 28, + 29, 30, 31, 32, 33, 34, 35, 37, + 38, 39, 40, 41, 42, 43, 44, 45, + -1, 53, 47, 51, 50, 48, 46, 49, + -1, -1, -1, 36, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, 216, -1, 218, 219, 220, + 221, 222, 223, 224, 225, 226, 227, 228, + 229, 230, 231, 232, 233, 234, 235, 236, + 237, 238, 239, 240, 241, 242, 243, 58, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 191, -1, -1, -1, + -1, 59, 60, 212, 274, -1, -1, -1, + -1, -1, -1, 61, -1, 279, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 175, 177, 178, 179, + 180, 181, 182, 183, -1, -1, 62, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 63, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 64, -1, -1, + 65, 172, 165, 67, 68, 69, 70, 217, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 217, -1, -1, -1, + -1, -1, -1, 217, 217, 217, 217, 217, + 217, 217, 217, 217, 217, 217, 217, 217, + 217, 217, 217, 217, -1, 217, 217, 217, + 217, 217, 217, 217, -1, -1, -1, 217, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 71, 217, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 217, -1, -1, -1, + -1, -1, -1, 217, 217, 217, 217, 217, + 217, 217, 217, 217, 217, 217, 217, 217, + 217, 217, 217, 217, -1, 217, 217, 217, + 217, 217, 217, 217, -1, -1, -1, 217, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 72, 217, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 217, -1, -1, -1, + -1, -1, -1, 217, 217, 217, 217, 217, + 217, 217, 217, 217, 217, 217, 217, 217, + 217, 217, 217, 217, -1, 217, 217, 217, + 217, 217, 217, 217, -1, -1, -1, 217, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 73, 74, + 91, 75, 76, 77, 217, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, 217, -1, -1, -1, -1, -1, -1, + 217, 217, 217, 217, 217, 217, 217, 217, + 217, 217, 217, 217, 217, 217, 217, 217, + 217, -1, 217, 217, 217, 217, 217, 217, + 217, -1, -1, -1, 217, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, 78, 79, 217, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 217, -1, -1, -1, -1, -1, + -1, 217, 217, 217, 217, 217, 217, 217, + 217, 217, 217, 217, 217, 217, 217, 217, + 217, 217, -1, 217, 217, 217, 217, 217, + 217, 217, -1, -1, -1, 217, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 80, 81, 82, 83, + 89, 85, 88, 90, 87, 86, 217, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, 217, -1, -1, -1, -1, + -1, -1, 217, 217, 217, 217, 217, 217, + 217, 217, 217, 217, 217, 217, 217, 217, + 217, 217, 217, -1, 217, 217, 217, 217, + 217, 217, 217, -1, -1, -1, 217, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 66, 271, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, 271, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 84, 160, 92, 167, 166, 173, + 93, 94, 188, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 188, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 95, + 193, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 193, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + 96, 214, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 214, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 97, + 276, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 276, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 98, + 100, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, 99, 281, 101, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 52, -1, -1, -1, -1, -1, + -1, 28, 29, 30, 31, 32, 33, 34, + 35, 37, 38, 39, 40, 41, 42, 43, + 44, 45, -1, 53, 47, 51, 50, 48, + 46, 49, -1, -1, -1, 36, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 216, -1, 218, + 219, 220, 221, 222, 223, 224, 225, 226, + 227, 228, 229, 230, 231, 232, 233, 234, + 235, 236, 237, 238, 239, 240, 241, 242, + 243, 244, 245, 246, 247, 102, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 52, -1, -1, -1, -1, -1, + -1, 28, 29, 30, 31, 32, 33, 34, + 35, 37, 38, 39, 40, 41, 42, 43, + 44, 45, -1, 53, 47, 51, 50, 48, + 46, 49, -1, -1, -1, 36, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 216, -1, 218, + 219, 220, 221, 222, 223, 224, 225, 226, + 227, 228, 229, 230, 231, 232, 233, 234, + 235, 236, 237, 238, 239, 240, 241, 242, + 243, 103, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 52, -1, + -1, -1, -1, -1, -1, 28, 29, 30, + 31, 32, 33, 34, 35, 37, 38, 39, + 40, 41, 42, 43, 44, 45, -1, 53, + 47, 51, 50, 48, 46, 49, -1, -1, + -1, 36, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, 216, -1, 218, 219, 220, 221, 222, + 223, 224, 225, 226, 227, 228, 229, 230, + 231, 232, 233, 234, 235, 236, 237, 238, + 239, 240, 241, 242, 243, 104, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 52, -1, -1, -1, -1, -1, + -1, 28, 29, 30, 31, 32, 33, 34, + 35, 37, 38, 39, 40, 41, 42, 43, + 44, 45, -1, 53, 47, 51, 50, 48, + 46, 49, -1, -1, -1, 36, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 216, -1, 218, + 219, 220, 221, 222, 223, 224, 225, 226, + 227, 228, 229, 230, 231, 232, 233, 234, + 235, 236, 237, 238, 239, 240, 241, 242, + 243, 251, 253, 254, 255, 105, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 52, -1, -1, -1, -1, -1, + -1, 28, 29, 30, 31, 32, 33, 34, + 35, 37, 38, 39, 40, 41, 42, 43, + 44, 45, -1, 53, 47, 51, 50, 48, + 46, 49, -1, -1, -1, 36, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 216, -1, 218, + 219, 220, 221, 222, 223, 224, 225, 226, + 227, 228, 229, 230, 231, 232, 233, 234, + 235, 236, 237, 238, 239, 240, 241, 242, + 243, 257, 106, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 52, + -1, -1, -1, -1, -1, -1, 28, 29, + 30, 31, 32, 33, 34, 35, 37, 38, + 39, 40, 41, 42, 43, 44, 45, -1, + 53, 47, 51, 50, 48, 46, 49, -1, + -1, -1, 36, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 216, -1, 218, 219, 220, 221, + 222, 223, 224, 225, 226, 227, 228, 229, + 230, 231, 232, 233, 234, 235, 236, 237, + 238, 239, 240, 241, 242, 243, 259, 260, + 261, 107, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 108, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 270, 263, + 267, 266, 264, 262, 265, 252, 163, 184, + 185, 109, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 110, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 187, + 111, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 112, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, 192, 113, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + 114, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, 213, 115, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, 116, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 275, 118, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 100, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 280, 117, + 268, 248, 249, 250, 256, 258, 269, 217, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 217, -1, -1, -1, + -1, -1, -1, 217, 217, 217, 217, 217, + 217, 217, 217, 217, 217, 217, 217, 217, + 217, 217, 217, 217, -1, 217, 217, 217, + 217, 217, 217, 217, -1, -1, -1, 217, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 119, 186, + 120, 190, 196, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, 196, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 196, -1, -1, + -1, -1, 196, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 121, 211, 217, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, 217, -1, -1, -1, -1, + -1, -1, 217, 217, 217, 217, 217, 217, + 217, 217, 217, 217, 217, 217, 217, 217, + 217, 217, 217, -1, 217, 217, 217, 217, + 217, 217, 217, -1, -1, -1, 217, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 122, 273, 123, + 282, 278, 124, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 52, + -1, -1, -1, -1, -1, -1, 28, 29, + 30, 31, 32, 33, 34, 35, 37, 38, + 39, 40, 41, 42, 43, 44, 45, -1, + 53, 47, 51, 50, 48, 46, 49, -1, + -1, -1, 36, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 216, -1, 218, 219, 220, 221, + 222, 223, 224, 225, 226, 227, 228, 229, + 230, 231, 232, 233, 234, 235, 236, 237, + 238, 239, 240, 241, 242, 243, 189, 125, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 207, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 126, -1, -1, -1, -1, 202, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 195, 197, 198, 199, 127, -1, + -1, 128, 129, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 52, + -1, -1, -1, -1, -1, -1, 28, 29, + 30, 31, 32, 33, 34, 35, 37, 38, + 39, 40, 41, 42, 43, 44, 45, -1, + 53, 47, 51, 50, 48, 46, 49, -1, + -1, -1, 36, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 216, -1, 218, 219, 220, 221, + 222, 223, 224, 225, 226, 227, 228, 229, + 230, 231, 232, 233, 234, 235, 236, 237, + 238, 239, 240, 241, 242, 243, 277, 272, + 194, 130, 204, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, 204, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 131, 209, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, 209, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 132, 215, + 200, 133, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 134, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 203, 135, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, 136, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 208, + 201, 137, 206, 138, 205, 210, +}; + +unsigned short Parser_keys[] = { + 129, 188, 185, 185, 47, 189, 47, 195, + 47, 207, 47, 196, 129, 129, 47, 47, + 47, 208, 47, 210, 131, 131, 47, 209, + 130, 130, 47, 47, 47, 206, 47, 206, + 47, 206, 47, 204, 47, 211, 180, 180, + 47, 47, 143, 143, 47, 266, 47, 205, + 47, 266, 47, 266, 47, 272, 184, 184, + 145, 145, 47, 47, 47, 47, 47, 47, + 47, 47, 47, 206, 47, 206, 47, 206, + 47, 47, 47, 47, 47, 47, 47, 47, + 47, 47, 47, 206, 47, 47, 47, 206, + 47, 47, 47, 47, 47, 47, 47, 47, + 47, 47, 47, 47, 47, 47, 47, 47, + 47, 47, 47, 206, 47, 267, 153, 153, + 47, 47, 181, 181, 182, 182, 136, 136, + 47, 47, 47, 47, 47, 220, 47, 223, + 47, 237, 47, 270, 150, 274, 47, 266, + 155, 155, 156, 156, 157, 157, 158, 158, + 47, 266, 47, 266, 47, 266, 162, 162, + 163, 163, 164, 164, 165, 165, 47, 266, + 167, 167, 47, 266, 169, 169, 170, 170, + 171, 171, 47, 268, 174, 174, 175, 175, + 176, 176, 177, 177, 178, 178, 179, 179, + 183, 183, 154, 154, 137, 137, 138, 138, + 47, 221, 47, 224, 47, 238, 47, 271, + 47, 274, 47, 47, 148, 148, 159, 159, + 160, 160, 161, 161, 166, 166, 168, 168, + 173, 173, 47, 206, 147, 147, 47, 47, + 132, 132, 47, 225, 139, 139, 47, 206, + 140, 140, 47, 47, 150, 150, 149, 149, + 47, 266, 171, 171, 47, 233, 47, 266, + 142, 142, 148, 148, 133, 133, 47, 47, + 47, 231, 47, 234, 141, 141, 146, 146, + 47, 232, 47, 235, 151, 151, 47, 47, + 134, 134, 47, 47, 152, 152, 135, 135, + 0, 0 +}; + +unsigned int Parser_offsets[] = { + 0, 60, 61, 204, 353, 514, 664, 665, + 666, 828, 992, 993, 1156, 1157, 1158, 1318, + 1478, 1638, 1796, 1961, 1962, 1963, 1964, 2184, + 2343, 2563, 2783, 3009, 3010, 3011, 3012, 3013, + 3014, 3015, 3175, 3335, 3495, 3496, 3497, 3498, + 3499, 3500, 3660, 3661, 3821, 3822, 3823, 3824, + 3825, 3826, 3827, 3828, 3829, 3830, 3990, 4211, + 4212, 4213, 4214, 4215, 4216, 4217, 4218, 4392, + 4569, 4760, 4984, 5109, 5329, 5330, 5331, 5332, + 5333, 5553, 5773, 5993, 5994, 5995, 5996, 5997, + 6217, 6218, 6438, 6439, 6440, 6441, 6663, 6664, + 6665, 6666, 6667, 6668, 6669, 6670, 6671, 6672, + 6673, 6848, 7026, 7218, 7443, 7671, 7672, 7673, + 7674, 7675, 7676, 7677, 7678, 7679, 7839, 7840, + 7841, 7842, 8021, 8022, 8182, 8183, 8184, 8185, + 8186, 8406, 8407, 8594, 8814, 8815, 8816, 8817, + 8818, 9003, 9191, 9192, 9193, 9379, 9568, 9569, + 9570, 9571, 9572, 9573, 9574 +}; + +unsigned short Parser_targs[] = { + 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, + 33, 34, 35, 36, 37, 38, 39, 40, + 41, 42, 43, 44, 45, 46, 47, 48, + 49, 50, 51, 52, 53, 54, 55, 56, + 57, 58, 59, 60, 61, 62, 63, 64, + 65, 66, 67, 68, 69, 70, 71, 72, + 73, 74, 75, 76, 77, 78, 79, 80, + 81, 82, 83, 84, 85, 86, 87, 88, + 89, 90, 91, 92, 93, 94, 95, 96, + 97, 98, 99, 100, 101, 102, 103, 104, + 105, 106, 107, 108, 109, 110, 111, 112, + 113, 114, 115, 116, 117, 118, 119, 120, + 121, 122, 123, 124, 125, 126, 127, 128, + 129, 130, 131, 132, 133, 134, 135, 136, + 137, 138, 139, 140, 140, 140, 140, 140, + 140, 140, 140, 140, 140, 140, 140, 140, + 140, 140, 140, 140, 140, 140, 140, 140, + 140, 140, 140, 140, 140, 140, 140, 140, + 140, 140, 140, 140, 140, 140, 140, 140, + 140, 140, 140, 140, 140, 140, 140, 140, + 140, 140, 140, 140, 140, 140, 140, 140, + 140, 140, 140, 140, 140, 140, 140, 140, + 140, 140, 140, 140, 140, 140, 140, 140, + 140, 140, 140, 140, 140, 140, 140, 140, + 140, 140, 140, 140, 140, 140, 140, 140, + 140, 140, 140, 140, 140, 140, 140, 140, + 140, 140, 140, 140, 140, 140, 140, 140, + 140, 140, 140, 140, 140, 140, 140, 140, + 140, 140, 140, 140, 140, 140, 140, 140, + 140, 140, 140, 140, 140, 140, 140, 140, + 140, 140, 140, 140, 140, 140, 140, 140, + 140, 140, 140, 140, 140, 140, 140, 140, + 140, 140, 140, 140 +}; + +unsigned int Parser_actInds[] = { + 0, 2, 4, 6, 8, 10, 12, 14, + 16, 18, 20, 22, 24, 26, 28, 30, + 32, 34, 36, 38, 40, 42, 44, 46, + 48, 50, 52, 54, 56, 58, 60, 62, + 64, 66, 68, 70, 72, 74, 76, 78, + 80, 82, 84, 86, 88, 90, 92, 94, + 96, 98, 100, 102, 104, 106, 108, 110, + 112, 114, 116, 118, 120, 122, 124, 126, + 128, 130, 132, 134, 136, 138, 140, 142, + 144, 146, 148, 150, 152, 154, 156, 158, + 160, 162, 164, 166, 168, 170, 172, 174, + 176, 178, 180, 182, 184, 186, 188, 190, + 192, 194, 196, 198, 200, 202, 204, 206, + 208, 210, 212, 214, 216, 218, 220, 222, + 224, 226, 228, 230, 232, 234, 236, 238, + 240, 242, 244, 246, 248, 250, 252, 254, + 256, 258, 260, 262, 264, 266, 268, 270, + 272, 274, 276, 278, 280, 282, 284, 286, + 288, 290, 292, 294, 296, 298, 300, 302, + 304, 306, 308, 310, 312, 314, 316, 318, + 320, 322, 324, 326, 328, 330, 332, 334, + 336, 338, 340, 342, 344, 346, 348, 350, + 352, 354, 356, 358, 360, 362, 364, 366, + 368, 370, 372, 374, 376, 378, 380, 382, + 384, 386, 388, 390, 392, 394, 396, 398, + 400, 402, 404, 406, 408, 410, 412, 414, + 416, 418, 420, 422, 424, 426, 428, 430, + 432, 434, 436, 438, 440, 442, 444, 446, + 448, 450, 452, 454, 456, 458, 460, 462, + 464, 466, 468, 470, 472, 474, 476, 478, + 480, 482, 484, 486, 488, 490, 492, 494, + 496, 498, 500, 502, 504, 506, 508, 510, + 512, 514, 516, 518, 520, 522, 524, 526, + 528, 530, 532, 534, 536, 538, 540, 542, + 544, 546, 548, 550, 552, 554, 556, 558, + 560, 562, 564, 566 +}; + +unsigned int Parser_actions[] = { + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 3, 0, + 6, 0, 11, 0, 15, 0, 19, 0, + 22, 0, 27, 0, 30, 0, 35, 0, + 39, 0, 43, 0, 47, 0, 51, 0, + 55, 0, 58, 0, 63, 0, 67, 0, + 71, 0, 75, 0, 79, 0, 83, 0, + 87, 0, 91, 0, 94, 0, 99, 0, + 103, 0, 107, 0, 111, 0, 115, 0, + 119, 0, 123, 0, 127, 0, 130, 0, + 135, 0, 139, 0, 143, 0, 147, 0, + 150, 0, 155, 0, 159, 0, 163, 0, + 167, 0, 171, 0, 175, 0, 179, 0, + 183, 0, 187, 0, 191, 0, 195, 0, + 198, 0, 203, 0, 207, 0, 211, 0, + 215, 0, 218, 0, 223, 0, 227, 0, + 230, 0, 235, 0, 239, 0, 243, 0, + 247, 0, 251, 0, 255, 0, 259, 0, + 262, 0, 267, 0, 271, 0, 275, 0, + 279, 0, 282, 0, 287, 0, 291, 0, + 295, 0, 299, 0, 302, 0, 307, 0, + 311, 0, 314, 0, 319, 0, 323, 0, + 327, 0, 331, 0, 335, 0, 339, 0, + 343, 0, 347, 0, 351, 0, 355, 0, + 359, 0, 363, 0, 367, 0, 371, 0, + 375, 0, 379, 0, 383, 0, 387, 0, + 391, 0, 395, 0, 399, 0, 403, 0, + 407, 0, 411, 0, 415, 0, 419, 0, + 423, 0, 427, 0, 431, 0, 435, 0, + 439, 0, 443, 0, 447, 0, 451, 0, + 455, 0, 459, 0, 463, 0, 467, 0, + 471, 0, 475, 0, 479, 0, 483, 0, + 487, 0, 491, 0, 495, 0, 499, 0, + 503, 0, 507, 0, 511, 0, 515, 0, + 519, 0, 523, 0, 527, 0, 530, 0, + 535, 0, 539, 0, 543, 0, 547, 0, + 550, 0, 555, 0, 559, 0, 563, 0, + 567, 0, 571, 0, 575, 0, 1, 0 +}; + +int Parser_commitLen[] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 2 +}; + +unsigned int Parser_fssProdIdIndex[] = { + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, + 56, 57, 58, 59, 60, 61, 62, 63, + 64, 65, 66, 67, 68, 69, 70, 71, + 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 86, 87, + 88, 89, 90, 91, 92, 93, 94, 95, + 96, 97, 98, 99, 100, 101, 102, 103, + 104, 105, 106, 107, 108, 109, 110, 111, + 112, 113, 114, 115, 116, 117, 118, 119, + 120, 121, 122, 123, 124, 125, 126, 127, + 128, 129, 130, 131, 132, 133, 134, 135, + 136, 137, 138, 139, 140, 141, 142, 143, + 144 +}; + +char Parser_fssProdLengths[] = { + 1, 0, 5, 1, 2, 0, 2, 0, + 1, 1, 3, 4, 1, 2, 0, 1, + 1, 1, 1, 1, 1, 4, 2, 0, + 3, 3, 4, 4, 4, 4, 1, 2, + 0, 3, 4, 1, 2, 0, 1, 1, + 1, 1, 1, 1, 1, 3, 3, 4, + 2, 0, 3, 4, 1, 2, 0, 4, + 2, 0, 1, 1, 1, 3, 4, 1, + 2, 0, 3, 4, 1, 2, 0, 3, + 4, 1, 2, 0, 4, 2, 0, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 3, 3, 3, 3, 4, 4, 4, + 3, 3, 3, 3, 3, 4, 3, 4, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 4, 4, 2, 0, 4, 4, 1, + 2, 0, 3, 4, 1, 2, 1, 3, + 1 +}; + +unsigned short Parser_prodLhsIds[] = { + 187, 187, 186, 188, 189, 189, 190, 190, + 192, 192, 193, 191, 195, 196, 196, 197, + 197, 197, 197, 197, 197, 202, 204, 204, + 205, 198, 199, 200, 201, 194, 207, 208, + 208, 209, 203, 210, 211, 211, 212, 212, + 212, 212, 212, 212, 212, 213, 214, 215, + 220, 220, 221, 216, 222, 223, 223, 224, + 225, 225, 226, 226, 226, 227, 228, 230, + 231, 231, 232, 229, 233, 234, 234, 235, + 217, 236, 237, 237, 238, 206, 206, 239, + 239, 239, 239, 239, 239, 239, 239, 239, + 239, 239, 239, 239, 239, 239, 239, 239, + 239, 239, 239, 239, 239, 239, 239, 239, + 239, 241, 242, 243, 244, 245, 246, 247, + 248, 249, 250, 251, 252, 253, 254, 255, + 256, 257, 258, 259, 260, 261, 262, 263, + 264, 265, 266, 267, 267, 268, 218, 269, + 270, 270, 271, 219, 272, 273, 273, 274, + 275 +}; + +const char *Parser_prodNames[] = { + "start-1", + "start-2", + "tag_ragel-1", + "tag_ragel_head-1", + "ragel_def_list-1", + "ragel_def_list-2", + "host_or_write_list-1", + "host_or_write_list-2", + "host_or_write-1", + "host_or_write-2", + "tag_host-1", + "ragel_def-1", + "tag_ragel_def_head-1", + "ragel_def_item_list-1", + "ragel_def_item_list-2", + "ragel_def_item-1", + "ragel_def_item-2", + "ragel_def_item-3", + "ragel_def_item-4", + "ragel_def_item-5", + "ragel_def_item-6", + "tag_export_list-1", + "export_list-1", + "export_list-2", + "tag_export-1", + "tag_alph_type-1", + "tag_getkey_expr-1", + "tag_access_expr-1", + "tag_curstate_expr-1", + "tag_write-1", + "tag_write_head-1", + "write_option_list-1", + "write_option_list-2", + "tag_arg-1", + "tag_machine-1", + "tag_machine_head-1", + "machine_item_list-1", + "machine_item_list-2", + "machine_item-1", + "machine_item-2", + "machine_item-3", + "machine_item-4", + "machine_item-5", + "machine_item-6", + "machine_item-7", + "tag_start_state-1", + "tag_error_state-1", + "tag_entry_points-1", + "entry_point_list-1", + "entry_point_list-2", + "tag_entry-1", + "tag_state_list-1", + "tag_state_list_head-1", + "state_list-1", + "state_list-2", + "tag_state-1", + "state_item_list-1", + "state_item_list-2", + "state_item-1", + "state_item-2", + "state_item-3", + "tag_state_actions-1", + "tag_state_cond_list-1", + "tag_state_cond_list_head-1", + "state_cond_list-1", + "state_cond_list-2", + "state_cond-1", + "tag_trans_list-1", + "tag_trans_list_head-1", + "trans_list-1", + "trans_list-2", + "tag_trans-1", + "tag_action_list-1", + "tag_action_list_head-1", + "action_list-1", + "action_list-2", + "tag_action-1", + "inline_list-1", + "inline_list-2", + "inline_item-1", + "inline_item-2", + "inline_item-3", + "inline_item-4", + "inline_item-5", + "inline_item-6", + "inline_item-7", + "inline_item-8", + "inline_item-9", + "inline_item-10", + "inline_item-11", + "inline_item-12", + "inline_item-13", + "inline_item-14", + "inline_item-15", + "inline_item-16", + "inline_item-17", + "inline_item-18", + "inline_item-19", + "inline_item-20", + "inline_item-21", + "inline_item-22", + "inline_item-23", + "inline_item-24", + "inline_item-25", + "inline_item-26", + "tag_text-1", + "tag_goto-1", + "tag_call-1", + "tag_next-1", + "tag_goto_expr-1", + "tag_call_expr-1", + "tag_next_expr-1", + "tag_ret-1", + "tag_break-1", + "tag_pchar-1", + "tag_char-1", + "tag_hold-1", + "tag_exec-1", + "tag_holdte-1", + "tag_execte-1", + "tag_curs-1", + "tag_targs-1", + "tag_il_entry-1", + "tag_init_tokstart-1", + "tag_init_act-1", + "tag_get_tokend-1", + "tag_set_tokstart-1", + "tag_set_tokend-1", + "tag_set_act-1", + "tag_sub_action-1", + "tag_lm_switch-1", + "lm_action_list-1", + "lm_action_list-2", + "tag_inline_action-1", + "tag_action_table_list-1", + "tag_action_table_list_head-1", + "action_table_list-1", + "action_table_list-2", + "tag_action_table-1", + "tag_cond_space_list-1", + "tag_cond_space_list_head-1", + "cond_space_list-1", + "cond_space_list-2", + "tag_cond_space-1", + "_start-1" +}; + +const char *Parser_lelNames[] = { + "D-0", + "D-1", + "D-2", + "D-3", + "D-4", + "D-5", + "D-6", + "D-7", + "D-8", + "D-9", + "D-10", + "D-11", + "D-12", + "D-13", + "D-14", + "D-15", + "D-16", + "D-17", + "D-18", + "D-19", + "D-20", + "D-21", + "D-22", + "D-23", + "D-24", + "D-25", + "D-26", + "D-27", + "D-28", + "D-29", + "D-30", + "D-31", + "D-32", + "!", + "\"", + "#", + "$", + "%", + "&", + "'", + "(", + ")", + "*", + "+", + ",", + "-", + ".", + "/", + "0", + "1", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + ":", + ";", + "<", + "=", + ">", + "?", + "@", + "A", + "B", + "C", + "D", + "E", + "F", + "G", + "H", + "I", + "J", + "K", + "L", + "M", + "N", + "O", + "P", + "Q", + "R", + "S", + "T", + "U", + "V", + "W", + "X", + "Y", + "Z", + "[", + "\\", + "]", + "^", + "_", + "`", + "a", + "b", + "c", + "d", + "e", + "f", + "g", + "h", + "i", + "j", + "k", + "l", + "m", + "n", + "o", + "p", + "q", + "r", + "s", + "t", + "u", + "v", + "w", + "x", + "y", + "z", + "{", + "|", + "}", + "~", + "D-127", + "TAG_unknown", + "TAG_ragel", + "TAG_ragel_def", + "TAG_host", + "TAG_state_list", + "TAG_state", + "TAG_trans_list", + "TAG_t", + "TAG_machine", + "TAG_start_state", + "TAG_error_state", + "TAG_action_list", + "TAG_action_table_list", + "TAG_action", + "TAG_action_table", + "TAG_alphtype", + "TAG_element", + "TAG_getkey", + "TAG_state_actions", + "TAG_entry_points", + "TAG_sub_action", + "TAG_cond_space_list", + "TAG_cond_space", + "TAG_cond_list", + "TAG_c", + "TAG_exports", + "TAG_ex", + "TAG_text", + "TAG_goto", + "TAG_call", + "TAG_next", + "TAG_goto_expr", + "TAG_call_expr", + "TAG_next_expr", + "TAG_ret", + "TAG_pchar", + "TAG_char", + "TAG_hold", + "TAG_exec", + "TAG_holdte", + "TAG_execte", + "TAG_curs", + "TAG_targs", + "TAG_entry", + "TAG_data", + "TAG_lm_switch", + "TAG_init_act", + "TAG_set_act", + "TAG_set_tokend", + "TAG_get_tokend", + "TAG_init_tokstart", + "TAG_set_tokstart", + "TAG_write", + "TAG_curstate", + "TAG_access", + "TAG_break", + "TAG_arg", + "_eof", + "tag_ragel", + "start", + "tag_ragel_head", + "ragel_def_list", + "host_or_write_list", + "ragel_def", + "host_or_write", + "tag_host", + "tag_write", + "tag_ragel_def_head", + "ragel_def_item_list", + "ragel_def_item", + "tag_alph_type", + "tag_getkey_expr", + "tag_access_expr", + "tag_curstate_expr", + "tag_export_list", + "tag_machine", + "export_list", + "tag_export", + "inline_list", + "tag_write_head", + "write_option_list", + "tag_arg", + "tag_machine_head", + "machine_item_list", + "machine_item", + "tag_start_state", + "tag_error_state", + "tag_entry_points", + "tag_state_list", + "tag_action_list", + "tag_action_table_list", + "tag_cond_space_list", + "entry_point_list", + "tag_entry", + "tag_state_list_head", + "state_list", + "tag_state", + "state_item_list", + "state_item", + "tag_state_actions", + "tag_state_cond_list", + "tag_trans_list", + "tag_state_cond_list_head", + "state_cond_list", + "state_cond", + "tag_trans_list_head", + "trans_list", + "tag_trans", + "tag_action_list_head", + "action_list", + "tag_action", + "inline_item", + "inline_item_type", + "tag_text", + "tag_goto", + "tag_call", + "tag_next", + "tag_goto_expr", + "tag_call_expr", + "tag_next_expr", + "tag_ret", + "tag_break", + "tag_pchar", + "tag_char", + "tag_hold", + "tag_exec", + "tag_holdte", + "tag_execte", + "tag_curs", + "tag_targs", + "tag_il_entry", + "tag_init_tokstart", + "tag_init_act", + "tag_get_tokend", + "tag_set_tokstart", + "tag_set_tokend", + "tag_set_act", + "tag_sub_action", + "tag_lm_switch", + "lm_action_list", + "tag_inline_action", + "tag_action_table_list_head", + "action_table_list", + "tag_action_table", + "tag_cond_space_list_head", + "cond_space_list", + "tag_cond_space", + "_start" +}; + +#line 851 "xmlparse.kl" + + +void Parser::init() +{ + #line 2079 "xmlparse.cpp" + curs = Parser_startState; + pool = 0; + freshEl = (struct Parser_LangEl*) malloc( sizeof(struct Parser_LangEl)*8128); + #ifdef LOG_ACTIONS + cerr << "allocating 8128 LangEls" << endl; + #endif + stackTop = freshEl; + stackTop->type = 0; + stackTop->state = -1; + stackTop->next = 0; + stackTop->child = 0; + freshPos = 1; + lastFinal = stackTop; + numRetry = 0; + numNodes = 0; + errCount = 0; +#line 856 "xmlparse.kl" +} + +int Parser::parseLangEl( int type, const Token *token ) +{ + #line 2101 "xmlparse.cpp" +#define reject() induceReject = 1 + + int pos, targState; + unsigned int *action; + int rhsLen; + struct Parser_LangEl *rhs[32]; + struct Parser_LangEl *lel; + struct Parser_LangEl *input; + char induceReject; + + if ( curs < 0 ) + return 0; + + if ( pool == 0 ) { + if ( freshPos == 8128 ) { + freshEl = (struct Parser_LangEl*) malloc( + sizeof(struct Parser_LangEl)*8128); + #ifdef LOG_ACTIONS + cerr << "allocating 8128 LangEls" << endl; + #endif + freshPos = 0; + } + input = freshEl + freshPos++; + } + else { + input = pool; + pool = pool->next; + } + numNodes += 1; + input->type = type; + input->user.token = *token; + input->next = 0; + input->retry = 0; + input->child = 0; + +again: + if ( input == 0 ) + goto _out; + + lel = input; + if ( lel->type < Parser_keys[curs<<1] || lel->type > Parser_keys[(curs<<1)+1] ) + goto parseError; + + pos = Parser_indicies[Parser_offsets[curs] + (lel->type - Parser_keys[curs<<1])]; + if ( pos < 0 ) + goto parseError; + + induceReject = 0; + targState = Parser_targs[pos]; + action = Parser_actions + Parser_actInds[pos]; + if ( lel->retry & 0x0000ffff ) + action += (lel->retry & 0x0000ffff); + + if ( *action & 0x1 ) { + #ifdef LOG_ACTIONS + cerr << "shifted: " << Parser_lelNames[lel->type]; + #endif + input = input->next; + lel->state = curs; + lel->next = stackTop; + stackTop = lel; + + if ( action[1] == 0 ) + lel->retry &= 0xffff0000; + else { + lel->retry += 1; + numRetry += 1; + #ifdef LOG_ACTIONS + cerr << " retry: " << stackTop; + #endif + } + #ifdef LOG_ACTIONS + cerr << endl; + #endif + } + + if ( Parser_commitLen[pos] != 0 ) { + struct Parser_LangEl *commitHead = stackTop; + int absCommitLen = Parser_commitLen[pos]; + + #ifdef LOG_ACTIONS + cerr << "running commit of length: " << Parser_commitLen[pos] << endl; + #endif + + if ( absCommitLen < 0 ) { + commitHead = commitHead->next; + absCommitLen = -1 * absCommitLen; + } + { + struct Parser_LangEl *lel = commitHead; + struct Parser_LangEl **cmStack = (struct Parser_LangEl**) malloc( sizeof(struct Parser_LangEl) * numNodes); + int n = absCommitLen, depth = 0, sp = 0; + +commit_head: + if ( lel->retry > 0 ) { + if ( lel->retry & 0x0000ffff ) + numRetry -= 1; + if ( lel->retry & 0xffff0000 ) + numRetry -= 1; + lel->retry = 0; + } + + /* If depth is > 0 then move over lel freely, otherwise, make + * sure that we have not already done n steps down the line. */ + if ( lel->next != 0 && ( depth > 0 || n > 1 ) ) { + cmStack[sp++] = lel; + lel = lel->next; + + /* If we are at the top level count the steps down the line. */ + if ( depth == 0 ) + n -= 1; + goto commit_head; + } + +commit_reverse: + if ( lel->child != 0 ) { + cmStack[sp++] = lel; + lel = lel->child; + + /* When we move down we need to increment the depth. */ + depth += 1; + goto commit_head; + } + +commit_upwards: + if ( sp > 0 ) { + /* Figure out which place to return to. */ + if ( cmStack[sp-1]->next == lel ) { + lel = cmStack[--sp]; + goto commit_reverse; + } + else { + /* Going back up, adjust the depth. */ + lel = cmStack[--sp]; + depth -= 1; + goto commit_upwards; + } + } + free( cmStack ); + } + if ( numRetry == 0 ) { + #ifdef LOG_ACTIONS + cerr << "number of retries is zero, " + "executing final actions" << endl; + #endif + { + struct Parser_LangEl *lel = commitHead; + struct Parser_LangEl **cmStack = (struct Parser_LangEl**) malloc( sizeof( struct Parser_LangEl) * numNodes); + int sp = 0; + char doExec = 0; + +final_head: + if ( lel == lastFinal ) { + doExec = 1; + goto hit_final; + } + + if ( lel->next != 0 ) { + cmStack[sp++] = lel; + lel = lel->next; + goto final_head; + } + +final_reverse: + + if ( lel->child != 0 ) { + cmStack[sp++] = lel; + lel = lel->child; + goto final_head; + } + +final_upwards: + + if ( doExec ) { +{ + if ( lel->type < 186 ) { + } + else { + struct Parser_LangEl *redLel = lel; + if ( redLel->child != 0 ) { + int r = Parser_fssProdLengths[redLel->reduction] - 1; + struct Parser_LangEl *rhsEl = redLel->child; + while ( rhsEl != 0 ) { + rhs[r--] = rhsEl; + rhsEl = rhsEl->next; + } + } +switch ( lel->reduction ) { +case 1: { +#line 46 "xmlparse.kl" + + /* If we get no input the assumption is that the frontend died and + * emitted an error. */ + errCount += 1; + + +#line 2297 "xmlparse.cpp" +} break; +case 3: { +#line 55 "xmlparse.kl" + + Attribute *fileNameAttr = (&rhs[0]->user.token)->tag->findAttr( "filename" ); + if ( fileNameAttr == 0 ) { + error((&rhs[0]->user.token)->loc) << "tag <ragel> requires a filename attribute" << endl; + exit(1); + } + else { + sourceFileName = fileNameAttr->value; + + Attribute *langAttr = (&rhs[0]->user.token)->tag->findAttr( "lang" ); + if ( langAttr == 0 ) + error((&rhs[0]->user.token)->loc) << "tag <ragel> requires a lang attribute" << endl; + else { + if ( strcmp( langAttr->value, "C" ) == 0 ) { + hostLangType = CCode; + hostLang = &hostLangC; + } + else if ( strcmp( langAttr->value, "D" ) == 0 ) { + hostLangType = DCode; + hostLang = &hostLangD; + } + else if ( strcmp( langAttr->value, "Java" ) == 0 ) { + hostLangType = JavaCode; + hostLang = &hostLangJava; + } + else if ( strcmp( langAttr->value, "Ruby" ) == 0 ) { + hostLangType = RubyCode; + hostLang = &hostLangRuby; + } + else { + error((&rhs[0]->user.token)->loc) << "expecting lang attribute to be " + "one of C, D, Java or Ruby" << endl; + } + + outStream = openOutput( sourceFileName ); + } + } + + +#line 2340 "xmlparse.cpp" +} break; +case 10: { +#line 105 "xmlparse.kl" + + Attribute *lineAttr = (&rhs[0]->user.token)->tag->findAttr( "line" ); + if ( lineAttr == 0 ) + error((&rhs[0]->user.token)->loc) << "tag <host> requires a line attribute" << endl; + else { + int line = atoi( lineAttr->value ); + if ( outputActive ) + lineDirective( *outStream, sourceFileName, line ); + } + + if ( outputActive ) + *outStream << (&rhs[2]->user.token)->tag->content; + + +#line 2358 "xmlparse.cpp" +} break; +case 11: { +#line 121 "xmlparse.kl" + + /* Do this before distributing transitions out to singles and defaults + * makes life easier. */ + cgd->redFsm->maxKey = cgd->findMaxKey(); + + cgd->redFsm->assignActionLocs(); + + /* Find the first final state (The final state with the lowest id). */ + cgd->redFsm->findFirstFinState(); + + /* Call the user's callback. */ + cgd->finishRagelDef(); + + +#line 2376 "xmlparse.cpp" +} break; +case 12: { +#line 136 "xmlparse.kl" + + char *fsmName = 0; + Attribute *nameAttr = (&rhs[0]->user.token)->tag->findAttr( "name" ); + if ( nameAttr != 0 ) { + fsmName = nameAttr->value; + + CodeGenMapEl *mapEl = codeGenMap.find( fsmName ); + if ( mapEl != 0 ) + cgd = mapEl->value; + else { + cgd = makeCodeGen( sourceFileName, fsmName, *outStream, wantComplete ); + codeGenMap.insert( fsmName, cgd ); + } + } + else { + cgd = makeCodeGen( sourceFileName, fsmName, + *outStream, wantComplete ); + } + + ::keyOps = &cgd->thisKeyOps; + + +#line 2402 "xmlparse.cpp" +} break; +case 24: { +#line 174 "xmlparse.kl" + + Attribute *nameAttr = (&rhs[0]->user.token)->tag->findAttr( "name" ); + if ( nameAttr == 0 ) + error((&rhs[0]->user.token)->loc) << "tag <ex> requires a name attribute" << endl; + else { + char *td = (&rhs[2]->user.token)->tag->content; + Key exportKey = readKey( td, &td ); + cgd->exportList.append( new Export( nameAttr->value, exportKey ) ); + } + + +#line 2417 "xmlparse.cpp" +} break; +case 25: { +#line 186 "xmlparse.kl" + + if ( ! cgd->setAlphType( (&rhs[2]->user.token)->tag->content ) ) + error((&rhs[0]->user.token)->loc) << "tag <alphtype> specifies unknown alphabet type" << endl; + + +#line 2426 "xmlparse.cpp" +} break; +case 26: { +#line 192 "xmlparse.kl" + + cgd->getKeyExpr = (&rhs[1]->user.inline_list)->inlineList; + + +#line 2434 "xmlparse.cpp" +} break; +case 27: { +#line 197 "xmlparse.kl" + + cgd->accessExpr = (&rhs[1]->user.inline_list)->inlineList; + + +#line 2442 "xmlparse.cpp" +} break; +case 28: { +#line 202 "xmlparse.kl" + + cgd->curStateExpr = (&rhs[1]->user.inline_list)->inlineList; + + +#line 2450 "xmlparse.cpp" +} break; +case 29: { +#line 207 "xmlparse.kl" + + /* Terminate the options list and call the write statement handler. */ + writeOptions.append(0); + cgd->writeStatement( (&rhs[0]->user.tag_write_head)->loc, writeOptions.length()-1, writeOptions.data ); + + /* CodeGenData may have issued an error. */ + errCount += cgd->codeGenErrCount; + + /* Clear the options in prep for the next write statement. */ + writeOptions.empty(); + + +#line 2466 "xmlparse.cpp" +} break; +case 30: { +#line 225 "xmlparse.kl" + + Attribute *nameAttr = (&rhs[0]->user.token)->tag->findAttr( "def_name" ); + Attribute *lineAttr = (&rhs[0]->user.token)->tag->findAttr( "line" ); + Attribute *colAttr = (&rhs[0]->user.token)->tag->findAttr( "col" ); + + if ( nameAttr == 0 ) + error((&rhs[0]->user.token)->loc) << "tag <write> requires a def_name attribute" << endl; + if ( lineAttr == 0 ) + error((&rhs[0]->user.token)->loc) << "tag <write> requires a line attribute" << endl; + if ( colAttr == 0 ) + error((&rhs[0]->user.token)->loc) << "tag <write> requires a col attribute" << endl; + + if ( nameAttr != 0 && lineAttr != 0 && colAttr != 0 ) { + CodeGenMapEl *mapEl = codeGenMap.find( nameAttr->value ); + if ( mapEl == 0 ) + error((&rhs[0]->user.token)->loc) << "internal error: cannot find codeGen" << endl; + else { + cgd = mapEl->value; + ::keyOps = &cgd->thisKeyOps; + } + + (&redLel->user.tag_write_head)->loc.line = atoi(lineAttr->value); + (&redLel->user.tag_write_head)->loc.col = atoi(colAttr->value); + } + + +#line 2496 "xmlparse.cpp" +} break; +case 33: { +#line 261 "xmlparse.kl" + + writeOptions.append( (&rhs[2]->user.token)->tag->content ); + + +#line 2504 "xmlparse.cpp" +} break; +case 34: { +#line 266 "xmlparse.kl" + + cgd->closeMachine(); + + +#line 2512 "xmlparse.cpp" +} break; +case 35: { +#line 271 "xmlparse.kl" + + cgd->createMachine(); + + +#line 2520 "xmlparse.cpp" +} break; +case 45: { +#line 291 "xmlparse.kl" + + unsigned long startState = strtoul( (&rhs[2]->user.token)->tag->content, 0, 10 ); + cgd->setStartState( startState ); + + +#line 2529 "xmlparse.cpp" +} break; +case 46: { +#line 297 "xmlparse.kl" + + unsigned long errorState = strtoul( (&rhs[2]->user.token)->tag->content, 0, 10 ); + cgd->setErrorState( errorState ); + + +#line 2538 "xmlparse.cpp" +} break; +case 47: { +#line 303 "xmlparse.kl" + + Attribute *errorAttr = (&rhs[0]->user.token)->tag->findAttr( "error" ); + if ( errorAttr != 0 ) + cgd->setForcedErrorState(); + + +#line 2548 "xmlparse.cpp" +} break; +case 50: { +#line 313 "xmlparse.kl" + + Attribute *nameAttr = (&rhs[0]->user.token)->tag->findAttr( "name" ); + if ( nameAttr == 0 ) { + error((&rhs[0]->user.token)->loc) << "tag <entry_points>::<entry> " + "requires a name attribute" << endl; + } + else { + char *data = (&rhs[2]->user.token)->tag->content; + unsigned long entry = strtoul( data, &data, 10 ); + cgd->addEntryPoint( nameAttr->value, entry ); + } + + +#line 2565 "xmlparse.cpp" +} break; +case 52: { +#line 329 "xmlparse.kl" + + Attribute *lengthAttr = (&rhs[0]->user.token)->tag->findAttr( "length" ); + if ( lengthAttr == 0 ) + error((&rhs[0]->user.token)->loc) << "tag <state_list> requires a length attribute" << endl; + else { + unsigned long length = strtoul( lengthAttr->value, 0, 10 ); + cgd->initStateList( length ); + curState = 0; + } + + +#line 2580 "xmlparse.cpp" +} break; +case 55: { +#line 344 "xmlparse.kl" + + Attribute *idAttr = (&rhs[0]->user.token)->tag->findAttr( "id" ); + if ( idAttr == 0 ) + error((&rhs[0]->user.token)->loc) << "tag <state> requires an id attribute" << endl; + else { + int id = atoi( idAttr->value ); + cgd->setId( curState, id ); + } + + Attribute *lengthAttr = (&rhs[0]->user.token)->tag->findAttr( "final" ); + if ( lengthAttr != 0 ) + cgd->setFinal( curState ); + curState += 1; + + +#line 2599 "xmlparse.cpp" +} break; +case 61: { +#line 367 "xmlparse.kl" + + char *ad = (&rhs[2]->user.token)->tag->content; + + long toStateAction = readOffsetPtr( ad, &ad ); + long fromStateAction = readOffsetPtr( ad, &ad ); + long eofAction = readOffsetPtr( ad, &ad ); + + cgd->setStateActions( curState, toStateAction, + fromStateAction, eofAction ); + + +#line 2614 "xmlparse.cpp" +} break; +case 63: { +#line 381 "xmlparse.kl" + + Attribute *lengthAttr = (&rhs[0]->user.token)->tag->findAttr( "length" ); + if ( lengthAttr == 0 ) + error((&rhs[0]->user.token)->loc) << "tag <cond_list> requires a length attribute" << endl; + else { + ulong length = readLength( lengthAttr->value ); + cgd->initStateCondList( curState, length ); + curStateCond = 0; + } + + +#line 2629 "xmlparse.cpp" +} break; +case 66: { +#line 396 "xmlparse.kl" + + char *td = (&rhs[2]->user.token)->tag->content; + Key lowKey = readKey( td, &td ); + Key highKey = readKey( td, &td ); + long condId = readOffsetPtr( td, &td ); + cgd->addStateCond( curState, lowKey, highKey, condId ); + + +#line 2641 "xmlparse.cpp" +} break; +case 67: { +#line 405 "xmlparse.kl" + + cgd->finishTransList( curState ); + + +#line 2649 "xmlparse.cpp" +} break; +case 68: { +#line 410 "xmlparse.kl" + + Attribute *lengthAttr = (&rhs[0]->user.token)->tag->findAttr( "length" ); + if ( lengthAttr == 0 ) + error((&rhs[0]->user.token)->loc) << "tag <trans_list> requires a length attribute" << endl; + else { + unsigned long length = strtoul( lengthAttr->value, 0, 10 ); + cgd->initTransList( curState, length ); + curTrans = 0; + } + + +#line 2664 "xmlparse.cpp" +} break; +case 71: { +#line 425 "xmlparse.kl" + + char *td = (&rhs[2]->user.token)->tag->content; + Key lowKey = readKey( td, &td ); + Key highKey = readKey( td, &td ); + long targ = readOffsetPtr( td, &td ); + long action = readOffsetPtr( td, &td ); + + cgd->newTrans( curState, curTrans++, lowKey, highKey, targ, action ); + + +#line 2678 "xmlparse.cpp" +} break; +case 73: { +#line 442 "xmlparse.kl" + + Attribute *lengthAttr = (&rhs[0]->user.token)->tag->findAttr( "length" ); + if ( lengthAttr == 0 ) + error((&rhs[0]->user.token)->loc) << "tag <action_list> requires a length attribute" << endl; + else { + unsigned long length = strtoul( lengthAttr->value, 0, 10 ); + cgd->initActionList( length ); + curAction = 0; + } + + +#line 2693 "xmlparse.cpp" +} break; +case 76: { +#line 461 "xmlparse.kl" + + Attribute *lineAttr = (&rhs[0]->user.token)->tag->findAttr( "line" ); + Attribute *colAttr = (&rhs[0]->user.token)->tag->findAttr( "col" ); + Attribute *nameAttr = (&rhs[0]->user.token)->tag->findAttr( "name" ); + if ( lineAttr == 0 || colAttr == 0) + error((&rhs[0]->user.token)->loc) << "tag <action> requires a line and col attributes" << endl; + else { + unsigned long line = strtoul( lineAttr->value, 0, 10 ); + unsigned long col = strtoul( colAttr->value, 0, 10 ); + + char *name = 0; + if ( nameAttr != 0 ) + name = nameAttr->value; + + cgd->newAction( curAction++, name, line, col, (&rhs[1]->user.inline_list)->inlineList ); + } + + +#line 2715 "xmlparse.cpp" +} break; +case 77: { +#line 486 "xmlparse.kl" + + /* Append the item to the list, return the list. */ + (&rhs[0]->user.inline_list)->inlineList->append( (&rhs[1]->user.inline_item_type)->inlineItem ); + (&redLel->user.inline_list)->inlineList = (&rhs[0]->user.inline_list)->inlineList; + + +#line 2725 "xmlparse.cpp" +} break; +case 78: { +#line 493 "xmlparse.kl" + + /* Start with empty list. */ + (&redLel->user.inline_list)->inlineList = new InlineList; + + +#line 2734 "xmlparse.cpp" +} break; +case 79: { +#line 505 "xmlparse.kl" + (&redLel->user.inline_item_type)->inlineItem = (&rhs[0]->user.inline_item_type)->inlineItem; + +#line 2740 "xmlparse.cpp" +} break; +case 80: { +#line 506 "xmlparse.kl" + (&redLel->user.inline_item_type)->inlineItem = (&rhs[0]->user.inline_item_type)->inlineItem; + +#line 2746 "xmlparse.cpp" +} break; +case 81: { +#line 507 "xmlparse.kl" + (&redLel->user.inline_item_type)->inlineItem = (&rhs[0]->user.inline_item_type)->inlineItem; + +#line 2752 "xmlparse.cpp" +} break; +case 82: { +#line 508 "xmlparse.kl" + (&redLel->user.inline_item_type)->inlineItem = (&rhs[0]->user.inline_item_type)->inlineItem; + +#line 2758 "xmlparse.cpp" +} break; +case 83: { +#line 509 "xmlparse.kl" + (&redLel->user.inline_item_type)->inlineItem = (&rhs[0]->user.inline_item_type)->inlineItem; + +#line 2764 "xmlparse.cpp" +} break; +case 84: { +#line 510 "xmlparse.kl" + (&redLel->user.inline_item_type)->inlineItem = (&rhs[0]->user.inline_item_type)->inlineItem; + +#line 2770 "xmlparse.cpp" +} break; +case 85: { +#line 511 "xmlparse.kl" + (&redLel->user.inline_item_type)->inlineItem = (&rhs[0]->user.inline_item_type)->inlineItem; + +#line 2776 "xmlparse.cpp" +} break; +case 86: { +#line 512 "xmlparse.kl" + (&redLel->user.inline_item_type)->inlineItem = (&rhs[0]->user.inline_item_type)->inlineItem; + +#line 2782 "xmlparse.cpp" +} break; +case 87: { +#line 513 "xmlparse.kl" + (&redLel->user.inline_item_type)->inlineItem = (&rhs[0]->user.inline_item_type)->inlineItem; + +#line 2788 "xmlparse.cpp" +} break; +case 88: { +#line 514 "xmlparse.kl" + (&redLel->user.inline_item_type)->inlineItem = (&rhs[0]->user.inline_item_type)->inlineItem; + +#line 2794 "xmlparse.cpp" +} break; +case 89: { +#line 515 "xmlparse.kl" + (&redLel->user.inline_item_type)->inlineItem = (&rhs[0]->user.inline_item_type)->inlineItem; + +#line 2800 "xmlparse.cpp" +} break; +case 90: { +#line 516 "xmlparse.kl" + (&redLel->user.inline_item_type)->inlineItem = (&rhs[0]->user.inline_item_type)->inlineItem; + +#line 2806 "xmlparse.cpp" +} break; +case 91: { +#line 517 "xmlparse.kl" + (&redLel->user.inline_item_type)->inlineItem = (&rhs[0]->user.inline_item_type)->inlineItem; + +#line 2812 "xmlparse.cpp" +} break; +case 92: { +#line 518 "xmlparse.kl" + (&redLel->user.inline_item_type)->inlineItem = (&rhs[0]->user.inline_item_type)->inlineItem; + +#line 2818 "xmlparse.cpp" +} break; +case 93: { +#line 519 "xmlparse.kl" + (&redLel->user.inline_item_type)->inlineItem = (&rhs[0]->user.inline_item_type)->inlineItem; + +#line 2824 "xmlparse.cpp" +} break; +case 94: { +#line 520 "xmlparse.kl" + (&redLel->user.inline_item_type)->inlineItem = (&rhs[0]->user.inline_item_type)->inlineItem; + +#line 2830 "xmlparse.cpp" +} break; +case 95: { +#line 521 "xmlparse.kl" + (&redLel->user.inline_item_type)->inlineItem = (&rhs[0]->user.inline_item_type)->inlineItem; + +#line 2836 "xmlparse.cpp" +} break; +case 96: { +#line 522 "xmlparse.kl" + (&redLel->user.inline_item_type)->inlineItem = (&rhs[0]->user.inline_item_type)->inlineItem; + +#line 2842 "xmlparse.cpp" +} break; +case 97: { +#line 523 "xmlparse.kl" + (&redLel->user.inline_item_type)->inlineItem = (&rhs[0]->user.inline_item_type)->inlineItem; + +#line 2848 "xmlparse.cpp" +} break; +case 98: { +#line 524 "xmlparse.kl" + (&redLel->user.inline_item_type)->inlineItem = (&rhs[0]->user.inline_item_type)->inlineItem; + +#line 2854 "xmlparse.cpp" +} break; +case 99: { +#line 525 "xmlparse.kl" + (&redLel->user.inline_item_type)->inlineItem = (&rhs[0]->user.inline_item_type)->inlineItem; + +#line 2860 "xmlparse.cpp" +} break; +case 100: { +#line 526 "xmlparse.kl" + (&redLel->user.inline_item_type)->inlineItem = (&rhs[0]->user.inline_item_type)->inlineItem; + +#line 2866 "xmlparse.cpp" +} break; +case 101: { +#line 527 "xmlparse.kl" + (&redLel->user.inline_item_type)->inlineItem = (&rhs[0]->user.inline_item_type)->inlineItem; + +#line 2872 "xmlparse.cpp" +} break; +case 102: { +#line 528 "xmlparse.kl" + (&redLel->user.inline_item_type)->inlineItem = (&rhs[0]->user.inline_item_type)->inlineItem; + +#line 2878 "xmlparse.cpp" +} break; +case 103: { +#line 529 "xmlparse.kl" + (&redLel->user.inline_item_type)->inlineItem = (&rhs[0]->user.inline_item_type)->inlineItem; + +#line 2884 "xmlparse.cpp" +} break; +case 104: { +#line 530 "xmlparse.kl" + (&redLel->user.inline_item_type)->inlineItem = (&rhs[0]->user.inline_item_type)->inlineItem; + +#line 2890 "xmlparse.cpp" +} break; +case 105: { +#line 560 "xmlparse.kl" + + (&redLel->user.inline_item_type)->inlineItem = new InlineItem( InputLoc(), InlineItem::Text ); + (&redLel->user.inline_item_type)->inlineItem->data = (&rhs[2]->user.token)->tag->content; + + +#line 2899 "xmlparse.cpp" +} break; +case 106: { +#line 566 "xmlparse.kl" + + int targ = strtol( (&rhs[2]->user.token)->tag->content, 0, 10 ); + (&redLel->user.inline_item_type)->inlineItem = new InlineItem( InputLoc(), InlineItem::Goto ); + (&redLel->user.inline_item_type)->inlineItem->targId = targ; + + +#line 2909 "xmlparse.cpp" +} break; +case 107: { +#line 573 "xmlparse.kl" + + int targ = strtol( (&rhs[2]->user.token)->tag->content, 0, 10 ); + (&redLel->user.inline_item_type)->inlineItem = new InlineItem( InputLoc(), InlineItem::Call ); + (&redLel->user.inline_item_type)->inlineItem->targId = targ; + + +#line 2919 "xmlparse.cpp" +} break; +case 108: { +#line 580 "xmlparse.kl" + + int targ = strtol( (&rhs[2]->user.token)->tag->content, 0, 10 ); + (&redLel->user.inline_item_type)->inlineItem = new InlineItem( InputLoc(), InlineItem::Next ); + (&redLel->user.inline_item_type)->inlineItem->targId = targ; + + +#line 2929 "xmlparse.cpp" +} break; +case 109: { +#line 587 "xmlparse.kl" + + (&redLel->user.inline_item_type)->inlineItem = new InlineItem( InputLoc(), InlineItem::GotoExpr ); + (&redLel->user.inline_item_type)->inlineItem->children = (&rhs[1]->user.inline_list)->inlineList; + + +#line 2938 "xmlparse.cpp" +} break; +case 110: { +#line 593 "xmlparse.kl" + + (&redLel->user.inline_item_type)->inlineItem = new InlineItem( InputLoc(), InlineItem::CallExpr ); + (&redLel->user.inline_item_type)->inlineItem->children = (&rhs[1]->user.inline_list)->inlineList; + + +#line 2947 "xmlparse.cpp" +} break; +case 111: { +#line 599 "xmlparse.kl" + + (&redLel->user.inline_item_type)->inlineItem = new InlineItem( InputLoc(), InlineItem::NextExpr ); + (&redLel->user.inline_item_type)->inlineItem->children = (&rhs[1]->user.inline_list)->inlineList; + + +#line 2956 "xmlparse.cpp" +} break; +case 112: { +#line 605 "xmlparse.kl" + + (&redLel->user.inline_item_type)->inlineItem = new InlineItem( InputLoc(), InlineItem::Ret ); + + +#line 2964 "xmlparse.cpp" +} break; +case 113: { +#line 610 "xmlparse.kl" + + (&redLel->user.inline_item_type)->inlineItem = new InlineItem( InputLoc(), InlineItem::Break ); + + +#line 2972 "xmlparse.cpp" +} break; +case 114: { +#line 615 "xmlparse.kl" + + (&redLel->user.inline_item_type)->inlineItem = new InlineItem( InputLoc(), InlineItem::PChar ); + + +#line 2980 "xmlparse.cpp" +} break; +case 115: { +#line 620 "xmlparse.kl" + + (&redLel->user.inline_item_type)->inlineItem = new InlineItem( InputLoc(), InlineItem::Char ); + + +#line 2988 "xmlparse.cpp" +} break; +case 116: { +#line 625 "xmlparse.kl" + + (&redLel->user.inline_item_type)->inlineItem = new InlineItem( InputLoc(), InlineItem::Hold ); + + +#line 2996 "xmlparse.cpp" +} break; +case 117: { +#line 630 "xmlparse.kl" + + (&redLel->user.inline_item_type)->inlineItem = new InlineItem( InputLoc(), InlineItem::Exec ); + (&redLel->user.inline_item_type)->inlineItem->children = (&rhs[1]->user.inline_list)->inlineList; + + +#line 3005 "xmlparse.cpp" +} break; +case 118: { +#line 636 "xmlparse.kl" + + (&redLel->user.inline_item_type)->inlineItem = new InlineItem( InputLoc(), InlineItem::HoldTE ); + + +#line 3013 "xmlparse.cpp" +} break; +case 119: { +#line 641 "xmlparse.kl" + + (&redLel->user.inline_item_type)->inlineItem = new InlineItem( InputLoc(), InlineItem::ExecTE ); + (&redLel->user.inline_item_type)->inlineItem->children = (&rhs[1]->user.inline_list)->inlineList; + + +#line 3022 "xmlparse.cpp" +} break; +case 120: { +#line 647 "xmlparse.kl" + + (&redLel->user.inline_item_type)->inlineItem = new InlineItem( InputLoc(), InlineItem::Curs ); + + +#line 3030 "xmlparse.cpp" +} break; +case 121: { +#line 652 "xmlparse.kl" + + (&redLel->user.inline_item_type)->inlineItem = new InlineItem( InputLoc(), InlineItem::Targs ); + + +#line 3038 "xmlparse.cpp" +} break; +case 122: { +#line 657 "xmlparse.kl" + + int targ = strtol( (&rhs[2]->user.token)->tag->content, 0, 10 ); + (&redLel->user.inline_item_type)->inlineItem = new InlineItem( InputLoc(), InlineItem::Entry ); + (&redLel->user.inline_item_type)->inlineItem->targId = targ; + + +#line 3048 "xmlparse.cpp" +} break; +case 123: { +#line 664 "xmlparse.kl" + + (&redLel->user.inline_item_type)->inlineItem = new InlineItem( InputLoc(), InlineItem::LmInitTokStart ); + + +#line 3056 "xmlparse.cpp" +} break; +case 124: { +#line 669 "xmlparse.kl" + + (&redLel->user.inline_item_type)->inlineItem = new InlineItem( InputLoc(), InlineItem::LmInitAct ); + + +#line 3064 "xmlparse.cpp" +} break; +case 125: { +#line 674 "xmlparse.kl" + + (&redLel->user.inline_item_type)->inlineItem = new InlineItem( InputLoc(), InlineItem::LmGetTokEnd ); + + +#line 3072 "xmlparse.cpp" +} break; +case 126: { +#line 679 "xmlparse.kl" + + (&redLel->user.inline_item_type)->inlineItem = new InlineItem( InputLoc(), InlineItem::LmSetTokStart ); + cgd->hasLongestMatch = true; + + +#line 3081 "xmlparse.cpp" +} break; +case 127: { +#line 685 "xmlparse.kl" + + (&redLel->user.inline_item_type)->inlineItem = new InlineItem( InputLoc(), InlineItem::LmSetTokEnd ); + (&redLel->user.inline_item_type)->inlineItem->offset = strtol( (&rhs[2]->user.token)->tag->content, 0, 10 ); + + +#line 3090 "xmlparse.cpp" +} break; +case 128: { +#line 691 "xmlparse.kl" + + (&redLel->user.inline_item_type)->inlineItem = new InlineItem( InputLoc(), InlineItem::LmSetActId ); + (&redLel->user.inline_item_type)->inlineItem->lmId = strtol( (&rhs[2]->user.token)->tag->content, 0, 10 ); + + +#line 3099 "xmlparse.cpp" +} break; +case 129: { +#line 697 "xmlparse.kl" + + (&redLel->user.inline_item_type)->inlineItem = new InlineItem( InputLoc(), InlineItem::SubAction ); + (&redLel->user.inline_item_type)->inlineItem->children = (&rhs[1]->user.inline_list)->inlineList; + + +#line 3108 "xmlparse.cpp" +} break; +case 130: { +#line 704 "xmlparse.kl" + + bool handlesError = false; + Attribute *handlesErrorAttr = (&rhs[0]->user.token)->tag->findAttr( "handles_error" ); + if ( handlesErrorAttr != 0 ) + handlesError = true; + + (&redLel->user.inline_item_type)->inlineItem = new InlineItem( InputLoc(), InlineItem::LmSwitch ); + (&redLel->user.inline_item_type)->inlineItem->children = (&rhs[1]->user.lm_action_list)->inlineList; + (&redLel->user.inline_item_type)->inlineItem->handlesError = handlesError; + + +#line 3123 "xmlparse.cpp" +} break; +case 131: { +#line 721 "xmlparse.kl" + + (&redLel->user.lm_action_list)->inlineList = (&rhs[0]->user.lm_action_list)->inlineList; + (&redLel->user.lm_action_list)->inlineList->append( (&rhs[1]->user.inline_item_type)->inlineItem ); + + +#line 3132 "xmlparse.cpp" +} break; +case 132: { +#line 726 "xmlparse.kl" + + (&redLel->user.lm_action_list)->inlineList = new InlineList; + + +#line 3140 "xmlparse.cpp" +} break; +case 133: { +#line 733 "xmlparse.kl" + + (&redLel->user.inline_item_type)->inlineItem = new InlineItem( InputLoc(), InlineItem::SubAction ); + (&redLel->user.inline_item_type)->inlineItem->children = (&rhs[1]->user.inline_list)->inlineList; + + Attribute *idAttr = (&rhs[0]->user.token)->tag->findAttr( "id" ); + if ( idAttr != 0 ) { + unsigned long id = strtoul( idAttr->value, 0, 10 ); + (&redLel->user.inline_item_type)->inlineItem->lmId = id; + } + + +#line 3155 "xmlparse.cpp" +} break; +case 135: { +#line 752 "xmlparse.kl" + + Attribute *lengthAttr = (&rhs[0]->user.token)->tag->findAttr( "length" ); + if ( lengthAttr == 0 ) { + error((&rhs[0]->user.token)->loc) << "tag <action_table_list> requires " + "a length attribute" << endl; + } + else { + unsigned long length = strtoul( lengthAttr->value, 0, 10 ); + cgd->initActionTableList( length ); + curActionTable = 0; + } + + +#line 3172 "xmlparse.cpp" +} break; +case 138: { +#line 769 "xmlparse.kl" + + /* Find the length of the action table. */ + Attribute *lengthAttr = (&rhs[0]->user.token)->tag->findAttr( "length" ); + if ( lengthAttr == 0 ) + error((&rhs[0]->user.token)->loc) << "tag <at> requires a length attribute" << endl; + else { + unsigned long length = strtoul( lengthAttr->value, 0, 10 ); + + /* Collect the action table. */ + RedAction *redAct = cgd->allActionTables + curActionTable; + redAct->actListId = curActionTable; + redAct->key.setAsNew( length ); + char *ptr = (&rhs[2]->user.token)->tag->content; + int pos = 0; + while ( *ptr != 0 ) { + unsigned long actionId = strtoul( ptr, &ptr, 10 ); + redAct->key[pos].key = 0; + redAct->key[pos].value = cgd->allActions+actionId; + pos += 1; + } + + /* Insert into the action table map. */ + cgd->redFsm->actionMap.insert( redAct ); + } + + curActionTable += 1; + + +#line 3204 "xmlparse.cpp" +} break; +case 140: { +#line 804 "xmlparse.kl" + + Attribute *lengthAttr = (&rhs[0]->user.token)->tag->findAttr( "length" ); + if ( lengthAttr == 0 ) { + error((&rhs[0]->user.token)->loc) << "tag <cond_space_list> " + "requires a length attribute" << endl; + } + else { + ulong length = readLength( lengthAttr->value ); + cgd->initCondSpaceList( length ); + curCondSpace = 0; + } + + +#line 3221 "xmlparse.cpp" +} break; +case 143: { +#line 821 "xmlparse.kl" + + Attribute *lengthAttr = (&rhs[0]->user.token)->tag->findAttr( "length" ); + Attribute *idAttr = (&rhs[0]->user.token)->tag->findAttr( "id" ); + if ( lengthAttr == 0 ) + error((&rhs[0]->user.token)->loc) << "tag <cond_space> requires a length attribute" << endl; + else { + if ( lengthAttr == 0 ) + error((&rhs[0]->user.token)->loc) << "tag <cond_space> requires an id attribute" << endl; + else { + unsigned long condSpaceId = strtoul( idAttr->value, 0, 10 ); + ulong length = readLength( lengthAttr->value ); + + char *td = (&rhs[2]->user.token)->tag->content; + Key baseKey = readKey( td, &td ); + + cgd->newCondSpace( curCondSpace, condSpaceId, baseKey ); + for ( ulong a = 0; a < length; a++ ) { + long actionOffset = readOffsetPtr( td, &td ); + cgd->condSpaceItem( curCondSpace, actionOffset ); + } + curCondSpace += 1; + } + } + + +#line 3250 "xmlparse.cpp" +} break; +} + } +} + + if ( lel->child != 0 ) { + struct Parser_LangEl *first = lel->child; + struct Parser_LangEl *child = lel->child; + numNodes -= 1; + lel->child = 0; + while ( child->next != 0 ) { + child = child->next; + numNodes -= 1; + } + child->next = pool; + pool = first; + } + } + +hit_final: + if ( sp > 0 ) { + /* Figure out which place to return to. */ + if ( cmStack[sp-1]->next == lel ) { + lel = cmStack[--sp]; + goto final_reverse; + } + else { + lel = cmStack[--sp]; + goto final_upwards; + } + } + + lastFinal = lel; + free( cmStack ); + } + } + } + + if ( *action & 0x2 ) { + int fssRed = *action >> 2; + int reduction = Parser_fssProdIdIndex[fssRed]; + struct Parser_LangEl *redLel; + if ( pool == 0 ) { + if ( freshPos == 8128 ) { + freshEl = (struct Parser_LangEl*) malloc( + sizeof(struct Parser_LangEl)*8128); + #ifdef LOG_ACTIONS + cerr << "allocating 8128 LangEls" << endl; + #endif + freshPos = 0; + } + redLel = freshEl + freshPos++; + } + else { + redLel = pool; + pool = pool->next; + } + numNodes += 1; + redLel->type = Parser_prodLhsIds[reduction]; + redLel->reduction = reduction; + redLel->child = 0; + redLel->next = 0; + redLel->retry = (lel->retry << 16); + lel->retry &= 0xffff0000; + + rhsLen = Parser_fssProdLengths[fssRed]; + if ( rhsLen > 0 ) { + int r; + for ( r = rhsLen-1; r > 0; r-- ) { + rhs[r] = stackTop; + stackTop = stackTop->next; + } + rhs[0] = stackTop; + stackTop = stackTop->next; + rhs[0]->next = 0; + } + #ifdef LOG_ACTIONS + cerr << "reduced: " + << Parser_prodNames[reduction] + << " rhsLen: " << rhsLen; + #endif + if ( action[1] == 0 ) + redLel->retry = 0; + else { + redLel->retry += 0x10000; + numRetry += 1; + #ifdef LOG_ACTIONS + cerr << " retry: " << redLel; + #endif + } + + #ifdef LOG_ACTIONS + cerr << endl; + #endif + + if ( rhsLen == 0 ) { + redLel->file = lel->file; + redLel->line = lel->line; + targState = curs; + } + else { + redLel->child = rhs[rhsLen-1]; + redLel->file = rhs[0]->file; + redLel->line = rhs[0]->line; + targState = rhs[0]->state; + } + + if ( induceReject ) { + #ifdef LOG_ACTIONS + cerr << "error induced during reduction of " << + Parser_lelNames[redLel->type] << endl; + #endif + redLel->state = curs; + redLel->next = stackTop; + stackTop = redLel; + curs = targState; + goto parseError; + } + else { + redLel->next = input; + input = redLel; + } + } + + + curs = targState; + goto again; + +parseError: + #ifdef LOG_BACKTRACK + cerr << "hit error" << endl; + #endif + if ( numRetry > 0 ) { + while ( 1 ) { + struct Parser_LangEl *redLel = stackTop; + if ( stackTop->type < 186 ) { + #ifdef LOG_BACKTRACK + cerr << "backing up over terminal: " << + Parser_lelNames[stackTop->type] << endl; + #endif + stackTop = stackTop->next; + redLel->next = input; + input = redLel; + } + else { + #ifdef LOG_BACKTRACK + cerr << "backing up over non-terminal: " << + Parser_lelNames[stackTop->type] << endl; + #endif + stackTop = stackTop->next; + struct Parser_LangEl *first = redLel->child; + if ( first == 0 ) + rhsLen = 0; + else { + rhsLen = 1; + while ( first->next != 0 ) { + first = first->next; + rhsLen += 1; + } + first->next = stackTop; + stackTop = redLel->child; + + struct Parser_LangEl *rhsEl = stackTop; + int p = rhsLen; + while ( p > 0 ) { + rhs[--p] = rhsEl; + rhsEl = rhsEl->next; + } + } + redLel->next = pool; + pool = redLel; + numNodes -= 1; + } + + if ( redLel->retry > 0 ) { + #ifdef LOG_BACKTRACK + cerr << "found retry targ: " << redLel << endl; + #endif + numRetry -= 1; + #ifdef LOG_BACKTRACK + cerr << "found retry: " << redLel << endl; + #endif + if ( redLel->retry & 0x0000ffff ) + curs = input->state; + else { + input->retry = redLel->retry >> 16; + if ( stackTop->state < 0 ) + curs = Parser_startState; + else { + curs = Parser_targs[(int)Parser_indicies[Parser_offsets[stackTop->state] + (stackTop->type - Parser_keys[stackTop->state<<1])]]; + } + } + goto again; + } + } + } + curs = -1; + errCount += 1; +_out: {} +#line 861 "xmlparse.kl" + return errCount == 0 ? 0 : -1; +} + + +unsigned long readLength( char *td ) +{ + return strtoul( td, 0, 10 ); +} + +Key readKey( char *td, char **end ) +{ + if ( keyOps->isSigned ) + return Key( strtol( td, end, 10 ) ); + else + return Key( strtoul( td, end, 10 ) ); +} + +long readOffsetPtr( char *td, char **end ) +{ + while ( *td == ' ' || *td == '\t' ) + td++; + + if ( *td == 'x' ) { + if ( end != 0 ) + *end = td + 1; + return -1; + } + + return strtol( td, end, 10 ); +} + +ostream &Parser::warning( const InputLoc &loc ) +{ + cerr << fileName << ":" << loc.line << ":" << loc.col << ": warning: "; + return cerr; +} + +ostream &Parser::error( const InputLoc &loc ) +{ + errCount += 1; + assert( fileName != 0 ); + cerr << fileName << ":" << loc.line << ":" << loc.col << ": "; + return cerr; +} + + +ostream &Parser::parser_error( int tokId, Token &token ) +{ + errCount += 1; + assert( fileName != 0 ); + cerr << fileName << ":" << token.loc.line << ":" << token.loc.col; + if ( token.tag != 0 ) { + if ( token.tag->tagId == 0 ) + cerr << ": at unknown tag"; + else + cerr << ": at tag <" << token.tag->tagId->name << ">"; + } + cerr << ": "; + + return cerr; +} + +int Parser::token( int tokenId, Token &tok ) +{ + int res = parseLangEl( tokenId, &tok ); + if ( res < 0 ) { + parser_error( tokenId, tok ) << "parse error" << endl; + exit(1); + } + return res; +} + +int Parser::token( int tokenId, int col, int line ) +{ + Token tok; + tok.loc.col = col; + tok.loc.line = line; + tok.tag = 0; + return token( tokenId, tok ); +} + +int Parser::token( XMLTag *tag, int col, int line ) +{ + Token tok; + tok.loc.col = col; + tok.loc.line = line; + tok.tag = tag; + + if ( tag->type == XMLTag::Close ) { + int res = token( '/', tok ); + if ( res < 0 ) + return res; + } + + tok.tag = tag; + return token( tag->tagId != 0 ? tag->tagId->id : TAG_unknown, tok ); +} diff --git a/contrib/tools/ragel5/redfsm/xmlparse.h b/contrib/tools/ragel5/redfsm/xmlparse.h new file mode 100644 index 0000000000..b51a7cd67a --- /dev/null +++ b/contrib/tools/ragel5/redfsm/xmlparse.h @@ -0,0 +1,228 @@ +/* Automatically generated by Kelbt from "xmlparse.kh". + * + * Parts of this file are copied from Kelbt source covered by the GNU + * GPL. As a special exception, you may use the parts of this file copied + * from Kelbt source without restriction. The remainder is derived from + * "xmlparse.kh" and inherits the copyright status of that file. + */ + +#line 1 "xmlparse.kh" +/* + * Copyright 2001-2007 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#ifndef _XMLPARSE_H +#define _XMLPARSE_H + +#include "vector.h" +#include "gendata.h" +#include <iostream> + +using std::ostream; + +struct AttrMarker +{ + char *id; + int idLen; + char *value; + int valueLen; +}; + +struct Attribute +{ + char *id; + char *value; +}; + +typedef Vector<AttrMarker> AttrMkList; +typedef Vector<Attribute> AttrList; +struct XMLTagHashPair; + +struct XMLTag +{ + enum TagType { Open, Close }; + + XMLTag( XMLTagHashPair *tagId, TagType type ) : + tagId(tagId), type(type), + content(0), attrList(0) {} + + Attribute *findAttr(const char *id ) + { + if ( attrList != 0 ) { + for ( AttrList::Iter attr = *attrList; attr.lte(); attr++ ) { + if ( strcmp( id, attr->id ) == 0 ) + return attr; + } + } + return 0; + } + + XMLTagHashPair *tagId; + TagType type; + + /* Content is associtated with closing tags. */ + char *content; + + /* Attribute lists are associated with opening tags. */ + AttrList *attrList; +}; + + +struct XMLTagHashPair +{ + const char *name; + int id; +}; + +struct Token +{ + XMLTag *tag; + InputLoc loc; +}; + +struct InlineItem; +struct InlineList; + +struct LmSwitchVect; +struct LmSwitchAction; + +struct Parser +{ + #line 117 "xmlparse.kh" + + + #line 111 "xmlparse.h" + struct Parser_LangEl *freshEl; + int freshPos; + struct Parser_LangEl *pool; + int numRetry; + int numNodes; + struct Parser_LangEl *stackTop; + struct Parser_LangEl *lastFinal; + int errCount; + int curs; +#line 120 "xmlparse.kh" + + void init(); + int parseLangEl( int type, const Token *token ); + + Parser(const char *fileName, bool outputActive, bool wantComplete ) : + fileName(fileName), sourceFileName(0), outStream(0), + outputActive(outputActive), wantComplete(wantComplete), + cgd(0) { } + + int token( int tokenId, Token &token ); + int token( int tokenId, int col, int line ); + int token( XMLTag *tag, int col, int line ); + + /* Report an error encountered by the parser. */ + ostream &warning( const InputLoc &loc ); + ostream &error(); + ostream &error( const InputLoc &loc ); + ostream &parser_error( int tokId, Token &token ); + + /* The name of the root section, this does not change during an include. */ + const char *fileName; + char *sourceFileName; + ostream *outStream; + bool outputActive; + bool wantComplete; + + /* Collected during parsing. */ + char *attrKey; + char *attrValue; + int curAction; + int curActionTable; + int curTrans; + int curState; + int curCondSpace; + int curStateCond; + + CodeGenData *cgd; + CodeGenMap codeGenMap; + + Vector <char*> writeOptions; +}; + +#line 164 "xmlparse.h" +#define TAG_unknown 128 +#define TAG_ragel 129 +#define TAG_ragel_def 130 +#define TAG_host 131 +#define TAG_state_list 132 +#define TAG_state 133 +#define TAG_trans_list 134 +#define TAG_t 135 +#define TAG_machine 136 +#define TAG_start_state 137 +#define TAG_error_state 138 +#define TAG_action_list 139 +#define TAG_action_table_list 140 +#define TAG_action 141 +#define TAG_action_table 142 +#define TAG_alphtype 143 +#define TAG_element 144 +#define TAG_getkey 145 +#define TAG_state_actions 146 +#define TAG_entry_points 147 +#define TAG_sub_action 148 +#define TAG_cond_space_list 149 +#define TAG_cond_space 150 +#define TAG_cond_list 151 +#define TAG_c 152 +#define TAG_exports 153 +#define TAG_ex 154 +#define TAG_text 155 +#define TAG_goto 156 +#define TAG_call 157 +#define TAG_next 158 +#define TAG_goto_expr 159 +#define TAG_call_expr 160 +#define TAG_next_expr 161 +#define TAG_ret 162 +#define TAG_pchar 163 +#define TAG_char 164 +#define TAG_hold 165 +#define TAG_exec 166 +#define TAG_holdte 167 +#define TAG_execte 168 +#define TAG_curs 169 +#define TAG_targs 170 +#define TAG_entry 171 +#define TAG_data 172 +#define TAG_lm_switch 173 +#define TAG_init_act 174 +#define TAG_set_act 175 +#define TAG_set_tokend 176 +#define TAG_get_tokend 177 +#define TAG_init_tokstart 178 +#define TAG_set_tokstart 179 +#define TAG_write 180 +#define TAG_curstate 181 +#define TAG_access 182 +#define TAG_break 183 +#define TAG_arg 184 +#define _eof 185 + +#line 163 "xmlparse.kh" + +int xml_parse( std::istream &input, const char *fileName, + bool outputActive, bool wantComplete ); + +#endif /* _XMLPARSE_H */ diff --git a/contrib/tools/ragel5/redfsm/xmlscan.cpp b/contrib/tools/ragel5/redfsm/xmlscan.cpp new file mode 100644 index 0000000000..a3d979a0ff --- /dev/null +++ b/contrib/tools/ragel5/redfsm/xmlscan.cpp @@ -0,0 +1,925 @@ +#line 1 "xmlscan.rl" +/* + * Copyright 2001-2007 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include <iostream> +#include <string.h> +#include "vector.h" +#include "xmlparse.h" +#include "buffer.h" + +using std::istream; +using std::cout; +using std::cerr; +using std::endl; + +#define BUFSIZE 4096 + + +#line 37 "xmlscan.cpp" +static const int Scanner_start = 20; + +static const int Scanner_first_final = 20; + +static const int Scanner_error = 0; + +#line 37 "xmlscan.rl" + +#include "phash.h" + +struct Scanner +{ + Scanner(const char *fileName, istream &input ) : + fileName(fileName), + input(input), + curline(1), + curcol(1), + p(0), pe(0), + done(false), + data(0), data_len(0), + value(0) + { + +#line 69 "xmlscan.cpp" + { + cs = Scanner_start; + tokstart = 0; + tokend = 0; + act = 0; + } +#line 63 "xmlscan.rl" + + } + + int scan(); + void adjustAttrPointers( int distance ); + std::ostream &error(); + + const char *fileName; + istream &input; + + /* Scanner State. */ + int cs, act, have, curline, curcol; + char *tokstart, *tokend; + char *p, *pe; + int done; + + /* Token data */ + char *data; + int data_len; + int value; + AttrMkList attrMkList; + Buffer buffer; + char *tag_id_start; + int tag_id_len; + int token_col, token_line; + + char buf[BUFSIZE]; +}; + + +#define TK_NO_TOKEN (-1) +#define TK_ERR 1 +#define TK_SPACE 2 +#define TK_EOF 3 +#define TK_OpenTag 4 +#define TK_CloseTag 5 + +#define ret_tok( _tok ) token = (_tok); data = tokstart + +void Scanner::adjustAttrPointers( int distance ) +{ + for ( AttrMkList::Iter attr = attrMkList; attr.lte(); attr++ ) { + attr->id -= distance; + attr->value -= distance; + } +} + +/* There is no claim that this is a proper XML parser, but it is good + * enough for our purposes. */ +#line 178 "xmlscan.rl" + + +int Scanner::scan( ) +{ + int token = TK_NO_TOKEN; + int space = 0, readlen = 0; + char *attr_id_start = 0; + char *attr_value_start = 0; + int attr_id_len = 0; + int attr_value_len = 0; + + attrMkList.empty(); + buffer.clear(); + + while ( 1 ) { + if ( p == pe ) { + //printf("scanner: need more data\n"); + + if ( tokstart == 0 ) + have = 0; + else { + /* There is data that needs to be shifted over. */ + //printf("scanner: buffer broken mid token\n"); + have = pe - tokstart; + memmove( buf, tokstart, have ); + + int distance = tokstart - buf; + tokend -= distance; + tag_id_start -= distance; + attr_id_start -= distance; + attr_value_start -= distance; + adjustAttrPointers( distance ); + tokstart = buf; + } + + p = buf + have; + space = BUFSIZE - have; + + if ( space == 0 ) { + /* We filled up the buffer trying to scan a token. */ + return TK_SPACE; + } + + if ( done ) { + //printf("scanner: end of file\n"); + p[0] = 0; + readlen = 1; + } + else { + input.read( p, space ); + readlen = input.gcount(); + if ( input.eof() ) { + //printf("scanner: setting done flag\n"); + done = 1; + } + } + + pe = p + readlen; + } + + +#line 188 "xmlscan.cpp" + { + if ( p == pe ) + goto _out; + switch ( cs ) + { +tr6: +#line 115 "xmlscan.rl" + { curcol++; } +#line 168 "xmlscan.rl" + {tokend = p+1;{ buffer.append( '&' ); }{p = ((tokend))-1;}} + goto st20; +tr8: +#line 115 "xmlscan.rl" + { curcol++; } +#line 172 "xmlscan.rl" + {tokend = p+1;{ buffer.append( '>' ); }{p = ((tokend))-1;}} + goto st20; +tr10: +#line 115 "xmlscan.rl" + { curcol++; } +#line 170 "xmlscan.rl" + {tokend = p+1;{ buffer.append( '<' ); }{p = ((tokend))-1;}} + goto st20; +tr20: +#line 150 "xmlscan.rl" + { tag_id_len = p - tag_id_start; } +#line 115 "xmlscan.rl" + { curcol++; } +#line 160 "xmlscan.rl" + {tokend = p+1;{ ret_tok( TK_CloseTag ); {{p = ((tokend))-1;}goto _out20;} }{p = ((tokend))-1;}} + goto st20; +tr23: +#line 115 "xmlscan.rl" + { curcol++; } +#line 160 "xmlscan.rl" + {tokend = p+1;{ ret_tok( TK_CloseTag ); {{p = ((tokend))-1;}goto _out20;} }{p = ((tokend))-1;}} + goto st20; +tr27: +#line 150 "xmlscan.rl" + { tag_id_len = p - tag_id_start; } +#line 115 "xmlscan.rl" + { curcol++; } +#line 157 "xmlscan.rl" + {tokend = p+1;{ ret_tok( TK_OpenTag ); {{p = ((tokend))-1;}goto _out20;} }{p = ((tokend))-1;}} + goto st20; +tr30: +#line 115 "xmlscan.rl" + { curcol++; } +#line 157 "xmlscan.rl" + {tokend = p+1;{ ret_tok( TK_OpenTag ); {{p = ((tokend))-1;}goto _out20;} }{p = ((tokend))-1;}} + goto st20; +tr46: +#line 132 "xmlscan.rl" + { + attr_value_len = p - attr_value_start; + + AttrMarker newAttr; + newAttr.id = attr_id_start; + newAttr.idLen = attr_id_len; + newAttr.value = attr_value_start; + newAttr.valueLen = attr_value_len; + attrMkList.append( newAttr ); + } +#line 115 "xmlscan.rl" + { curcol++; } +#line 157 "xmlscan.rl" + {tokend = p+1;{ ret_tok( TK_OpenTag ); {{p = ((tokend))-1;}goto _out20;} }{p = ((tokend))-1;}} + goto st20; +tr48: +#line 115 "xmlscan.rl" + { curcol++; } +#line 164 "xmlscan.rl" + {tokend = p+1;{ buffer.append( *p ); }{p = ((tokend))-1;}} + goto st20; +tr49: +#line 116 "xmlscan.rl" + { token_col = curcol; token_line = curline; } +#line 175 "xmlscan.rl" + {tokend = p+1;{ ret_tok( TK_EOF ); {{p = ((tokend))-1;}goto _out20;} }{p = ((tokend))-1;}} + goto st20; +tr50: +#line 117 "xmlscan.rl" + { curcol = 0; curline++; } +#line 115 "xmlscan.rl" + { curcol++; } +#line 164 "xmlscan.rl" + {tokend = p+1;{ buffer.append( *p ); }{p = ((tokend))-1;}} + goto st20; +st20: +#line 1 "xmlscan.rl" + {tokstart = 0;} + if ( ++p == pe ) + goto _out20; +case 20: +#line 1 "xmlscan.rl" + {tokstart = p;} +#line 285 "xmlscan.cpp" + switch( (*p) ) { + case 0: goto tr49; + case 10: goto tr50; + case 38: goto tr51; + case 60: goto tr52; + } + goto tr48; +tr51: +#line 115 "xmlscan.rl" + { curcol++; } + goto st1; +st1: + if ( ++p == pe ) + goto _out1; +case 1: +#line 301 "xmlscan.cpp" + switch( (*p) ) { + case 97: goto tr0; + case 103: goto tr2; + case 108: goto tr3; + } + goto st0; +st0: + goto _out0; +tr0: +#line 115 "xmlscan.rl" + { curcol++; } + goto st2; +st2: + if ( ++p == pe ) + goto _out2; +case 2: +#line 318 "xmlscan.cpp" + if ( (*p) == 109 ) + goto tr4; + goto st0; +tr4: +#line 115 "xmlscan.rl" + { curcol++; } + goto st3; +st3: + if ( ++p == pe ) + goto _out3; +case 3: +#line 330 "xmlscan.cpp" + if ( (*p) == 112 ) + goto tr5; + goto st0; +tr5: +#line 115 "xmlscan.rl" + { curcol++; } + goto st4; +st4: + if ( ++p == pe ) + goto _out4; +case 4: +#line 342 "xmlscan.cpp" + if ( (*p) == 59 ) + goto tr6; + goto st0; +tr2: +#line 115 "xmlscan.rl" + { curcol++; } + goto st5; +st5: + if ( ++p == pe ) + goto _out5; +case 5: +#line 354 "xmlscan.cpp" + if ( (*p) == 116 ) + goto tr7; + goto st0; +tr7: +#line 115 "xmlscan.rl" + { curcol++; } + goto st6; +st6: + if ( ++p == pe ) + goto _out6; +case 6: +#line 366 "xmlscan.cpp" + if ( (*p) == 59 ) + goto tr8; + goto st0; +tr3: +#line 115 "xmlscan.rl" + { curcol++; } + goto st7; +st7: + if ( ++p == pe ) + goto _out7; +case 7: +#line 378 "xmlscan.cpp" + if ( (*p) == 116 ) + goto tr9; + goto st0; +tr9: +#line 115 "xmlscan.rl" + { curcol++; } + goto st8; +st8: + if ( ++p == pe ) + goto _out8; +case 8: +#line 390 "xmlscan.cpp" + if ( (*p) == 59 ) + goto tr10; + goto st0; +tr11: +#line 115 "xmlscan.rl" + { curcol++; } + goto st9; +tr12: +#line 117 "xmlscan.rl" + { curcol = 0; curline++; } +#line 115 "xmlscan.rl" + { curcol++; } + goto st9; +tr52: +#line 116 "xmlscan.rl" + { token_col = curcol; token_line = curline; } +#line 115 "xmlscan.rl" + { curcol++; } + goto st9; +st9: + if ( ++p == pe ) + goto _out9; +case 9: +#line 414 "xmlscan.cpp" + switch( (*p) ) { + case 9: goto tr11; + case 10: goto tr12; + case 13: goto tr11; + case 32: goto tr11; + case 47: goto tr13; + case 95: goto tr14; + } + if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr14; + } else if ( (*p) >= 65 ) + goto tr14; + goto st0; +tr13: +#line 115 "xmlscan.rl" + { curcol++; } + goto st10; +tr15: +#line 117 "xmlscan.rl" + { curcol = 0; curline++; } +#line 115 "xmlscan.rl" + { curcol++; } + goto st10; +st10: + if ( ++p == pe ) + goto _out10; +case 10: +#line 443 "xmlscan.cpp" + switch( (*p) ) { + case 9: goto tr13; + case 10: goto tr15; + case 13: goto tr13; + case 32: goto tr13; + case 95: goto tr16; + } + if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr16; + } else if ( (*p) >= 65 ) + goto tr16; + goto st0; +tr19: +#line 115 "xmlscan.rl" + { curcol++; } + goto st11; +tr16: +#line 149 "xmlscan.rl" + { tag_id_start = p; } +#line 115 "xmlscan.rl" + { curcol++; } + goto st11; +st11: + if ( ++p == pe ) + goto _out11; +case 11: +#line 471 "xmlscan.cpp" + switch( (*p) ) { + case 9: goto tr17; + case 10: goto tr18; + case 13: goto tr17; + case 32: goto tr17; + case 62: goto tr20; + case 95: goto tr19; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr19; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr19; + } else + goto tr19; + goto st0; +tr21: +#line 115 "xmlscan.rl" + { curcol++; } + goto st12; +tr22: +#line 117 "xmlscan.rl" + { curcol = 0; curline++; } +#line 115 "xmlscan.rl" + { curcol++; } + goto st12; +tr17: +#line 150 "xmlscan.rl" + { tag_id_len = p - tag_id_start; } +#line 115 "xmlscan.rl" + { curcol++; } + goto st12; +tr18: +#line 150 "xmlscan.rl" + { tag_id_len = p - tag_id_start; } +#line 117 "xmlscan.rl" + { curcol = 0; curline++; } +#line 115 "xmlscan.rl" + { curcol++; } + goto st12; +st12: + if ( ++p == pe ) + goto _out12; +case 12: +#line 517 "xmlscan.cpp" + switch( (*p) ) { + case 9: goto tr21; + case 10: goto tr22; + case 13: goto tr21; + case 32: goto tr21; + case 62: goto tr23; + } + goto st0; +tr26: +#line 115 "xmlscan.rl" + { curcol++; } + goto st13; +tr14: +#line 149 "xmlscan.rl" + { tag_id_start = p; } +#line 115 "xmlscan.rl" + { curcol++; } + goto st13; +st13: + if ( ++p == pe ) + goto _out13; +case 13: +#line 540 "xmlscan.cpp" + switch( (*p) ) { + case 9: goto tr24; + case 10: goto tr25; + case 13: goto tr24; + case 32: goto tr24; + case 62: goto tr27; + case 95: goto tr26; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr26; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr26; + } else + goto tr26; + goto st0; +tr28: +#line 115 "xmlscan.rl" + { curcol++; } + goto st14; +tr29: +#line 117 "xmlscan.rl" + { curcol = 0; curline++; } +#line 115 "xmlscan.rl" + { curcol++; } + goto st14; +tr24: +#line 150 "xmlscan.rl" + { tag_id_len = p - tag_id_start; } +#line 115 "xmlscan.rl" + { curcol++; } + goto st14; +tr25: +#line 150 "xmlscan.rl" + { tag_id_len = p - tag_id_start; } +#line 117 "xmlscan.rl" + { curcol = 0; curline++; } +#line 115 "xmlscan.rl" + { curcol++; } + goto st14; +tr44: +#line 132 "xmlscan.rl" + { + attr_value_len = p - attr_value_start; + + AttrMarker newAttr; + newAttr.id = attr_id_start; + newAttr.idLen = attr_id_len; + newAttr.value = attr_value_start; + newAttr.valueLen = attr_value_len; + attrMkList.append( newAttr ); + } +#line 115 "xmlscan.rl" + { curcol++; } + goto st14; +tr45: +#line 132 "xmlscan.rl" + { + attr_value_len = p - attr_value_start; + + AttrMarker newAttr; + newAttr.id = attr_id_start; + newAttr.idLen = attr_id_len; + newAttr.value = attr_value_start; + newAttr.valueLen = attr_value_len; + attrMkList.append( newAttr ); + } +#line 117 "xmlscan.rl" + { curcol = 0; curline++; } +#line 115 "xmlscan.rl" + { curcol++; } + goto st14; +st14: + if ( ++p == pe ) + goto _out14; +case 14: +#line 618 "xmlscan.cpp" + switch( (*p) ) { + case 9: goto tr28; + case 10: goto tr29; + case 13: goto tr28; + case 32: goto tr28; + case 62: goto tr30; + case 95: goto tr31; + } + if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr31; + } else if ( (*p) >= 65 ) + goto tr31; + goto st0; +tr34: +#line 115 "xmlscan.rl" + { curcol++; } + goto st15; +tr31: +#line 124 "xmlscan.rl" + { attr_id_start = p; } +#line 115 "xmlscan.rl" + { curcol++; } + goto st15; +tr47: +#line 132 "xmlscan.rl" + { + attr_value_len = p - attr_value_start; + + AttrMarker newAttr; + newAttr.id = attr_id_start; + newAttr.idLen = attr_id_len; + newAttr.value = attr_value_start; + newAttr.valueLen = attr_value_len; + attrMkList.append( newAttr ); + } +#line 124 "xmlscan.rl" + { attr_id_start = p; } +#line 115 "xmlscan.rl" + { curcol++; } + goto st15; +st15: + if ( ++p == pe ) + goto _out15; +case 15: +#line 664 "xmlscan.cpp" + switch( (*p) ) { + case 9: goto tr32; + case 10: goto tr33; + case 13: goto tr32; + case 32: goto tr32; + case 61: goto tr35; + case 95: goto tr34; + } + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr34; + } else if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr34; + } else + goto tr34; + goto st0; +tr36: +#line 115 "xmlscan.rl" + { curcol++; } + goto st16; +tr37: +#line 117 "xmlscan.rl" + { curcol = 0; curline++; } +#line 115 "xmlscan.rl" + { curcol++; } + goto st16; +tr32: +#line 125 "xmlscan.rl" + { attr_id_len = p - attr_id_start; } +#line 115 "xmlscan.rl" + { curcol++; } + goto st16; +tr33: +#line 125 "xmlscan.rl" + { attr_id_len = p - attr_id_start; } +#line 117 "xmlscan.rl" + { curcol = 0; curline++; } +#line 115 "xmlscan.rl" + { curcol++; } + goto st16; +st16: + if ( ++p == pe ) + goto _out16; +case 16: +#line 710 "xmlscan.cpp" + switch( (*p) ) { + case 9: goto tr36; + case 10: goto tr37; + case 13: goto tr36; + case 32: goto tr36; + case 61: goto tr38; + } + goto st0; +tr38: +#line 115 "xmlscan.rl" + { curcol++; } + goto st17; +tr39: +#line 117 "xmlscan.rl" + { curcol = 0; curline++; } +#line 115 "xmlscan.rl" + { curcol++; } + goto st17; +tr35: +#line 125 "xmlscan.rl" + { attr_id_len = p - attr_id_start; } +#line 115 "xmlscan.rl" + { curcol++; } + goto st17; +st17: + if ( ++p == pe ) + goto _out17; +case 17: +#line 739 "xmlscan.cpp" + switch( (*p) ) { + case 9: goto tr38; + case 10: goto tr39; + case 13: goto tr38; + case 32: goto tr38; + case 34: goto tr40; + } + goto st0; +tr41: +#line 115 "xmlscan.rl" + { curcol++; } + goto st18; +tr42: +#line 117 "xmlscan.rl" + { curcol = 0; curline++; } +#line 115 "xmlscan.rl" + { curcol++; } + goto st18; +tr40: +#line 130 "xmlscan.rl" + { attr_value_start = p; } +#line 115 "xmlscan.rl" + { curcol++; } + goto st18; +st18: + if ( ++p == pe ) + goto _out18; +case 18: +#line 768 "xmlscan.cpp" + switch( (*p) ) { + case 10: goto tr42; + case 34: goto tr43; + } + goto tr41; +tr43: +#line 115 "xmlscan.rl" + { curcol++; } + goto st19; +st19: + if ( ++p == pe ) + goto _out19; +case 19: +#line 782 "xmlscan.cpp" + switch( (*p) ) { + case 9: goto tr44; + case 10: goto tr45; + case 13: goto tr44; + case 32: goto tr44; + case 62: goto tr46; + case 95: goto tr47; + } + if ( (*p) > 90 ) { + if ( 97 <= (*p) && (*p) <= 122 ) + goto tr47; + } else if ( (*p) >= 65 ) + goto tr47; + goto st0; + } + _out20: cs = 20; goto _out; + _out1: cs = 1; goto _out; + _out0: cs = 0; goto _out; + _out2: cs = 2; goto _out; + _out3: cs = 3; goto _out; + _out4: cs = 4; goto _out; + _out5: cs = 5; goto _out; + _out6: cs = 6; goto _out; + _out7: cs = 7; goto _out; + _out8: cs = 8; goto _out; + _out9: cs = 9; goto _out; + _out10: cs = 10; goto _out; + _out11: cs = 11; goto _out; + _out12: cs = 12; goto _out; + _out13: cs = 13; goto _out; + _out14: cs = 14; goto _out; + _out15: cs = 15; goto _out; + _out16: cs = 16; goto _out; + _out17: cs = 17; goto _out; + _out18: cs = 18; goto _out; + _out19: cs = 19; goto _out; + + _out: {} + } +#line 239 "xmlscan.rl" + + if ( cs == Scanner_error ) + return TK_ERR; + + if ( token != TK_NO_TOKEN ) { + /* fbreak does not advance p, so we do it manually. */ + p = p + 1; + data_len = p - data; + return token; + } + } +} + +int xml_parse( std::istream &input, const char *fileName, + bool outputActive, bool wantComplete ) +{ + Scanner scanner( fileName, input ); + Parser parser( fileName, outputActive, wantComplete ); + + parser.init(); + + while ( 1 ) { + int token = scanner.scan(); + if ( token == TK_NO_TOKEN ) { + cerr << "xmlscan: interal error: scanner returned NO_TOKEN" << endl; + exit(1); + } + else if ( token == TK_EOF ) { + parser.token( _eof, scanner.token_col, scanner.token_line ); + break; + } + else if ( token == TK_ERR ) { + scanner.error() << "scanner error" << endl; + break; + } + else if ( token == TK_SPACE ) { + scanner.error() << "scanner is out of buffer space" << endl; + break; + } + else { + /* All other tokens are either open or close tags. */ + XMLTagHashPair *tagId = Perfect_Hash::in_word_set( + scanner.tag_id_start, scanner.tag_id_len ); + + XMLTag *tag = new XMLTag( tagId, token == TK_OpenTag ? + XMLTag::Open : XMLTag::Close ); + + if ( tagId != 0 ) { + /* Get attributes for open tags. */ + if ( token == TK_OpenTag && scanner.attrMkList.length() > 0 ) { + tag->attrList = new AttrList; + for ( AttrMkList::Iter attr = scanner.attrMkList; + attr.lte(); attr++ ) + { + Attribute newAttr; + newAttr.id = new char[attr->idLen+1]; + memcpy( newAttr.id, attr->id, attr->idLen ); + newAttr.id[attr->idLen] = 0; + + /* Exclude the surrounding quotes. */ + newAttr.value = new char[attr->valueLen-1]; + memcpy( newAttr.value, attr->value+1, attr->valueLen-2 ); + newAttr.value[attr->valueLen-2] = 0; + + tag->attrList->append( newAttr ); + } + } + + /* Get content for closing tags. */ + if ( token == TK_CloseTag ) { + switch ( tagId->id ) { + case TAG_host: case TAG_arg: + case TAG_t: case TAG_alphtype: + case TAG_text: case TAG_goto: + case TAG_call: case TAG_next: + case TAG_entry: case TAG_set_tokend: + case TAG_set_act: case TAG_start_state: + case TAG_error_state: case TAG_state_actions: + case TAG_action_table: case TAG_cond_space: + case TAG_c: case TAG_ex: + tag->content = new char[scanner.buffer.length+1]; + memcpy( tag->content, scanner.buffer.data, + scanner.buffer.length ); + tag->content[scanner.buffer.length] = 0; + break; + } + } + } + + #if 0 + cerr << "parser_driver: " << (tag->type == XMLTag::Open ? "open" : "close") << + ": " << (tag->tagId != 0 ? tag->tagId->name : "<unknown>") << endl; + if ( tag->attrList != 0 ) { + for ( AttrList::Iter attr = *tag->attrList; attr.lte(); attr++ ) + cerr << " " << attr->id << ": " << attr->value << endl; + } + if ( tag->content != 0 ) + cerr << " content: " << tag->content << endl; + #endif + + parser.token( tag, scanner.token_col, scanner.token_line ); + } + } + + return 0; +} + +std::ostream &Scanner::error() +{ + cerr << fileName << ":" << curline << ":" << curcol << ": "; + return cerr; +} diff --git a/contrib/tools/ragel5/redfsm/xmltags.cpp b/contrib/tools/ragel5/redfsm/xmltags.cpp new file mode 100644 index 0000000000..5fbfabab1d --- /dev/null +++ b/contrib/tools/ragel5/redfsm/xmltags.cpp @@ -0,0 +1,244 @@ +/* C++ code produced by gperf version 3.0.1 */ +/* Command-line: gperf -L C++ -t xmltags.gperf */ +/* Computed positions: -k'1,3' */ + +#if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \ + && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \ + && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \ + && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \ + && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \ + && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \ + && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \ + && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \ + && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \ + && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \ + && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \ + && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \ + && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \ + && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \ + && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \ + && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \ + && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \ + && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \ + && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \ + && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \ + && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \ + && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \ + && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126)) +/* The character set is not based on ISO-646. */ +#error "gperf generated tables don't work with this execution character set. Please report a bug to <bug-gnu-gperf@gnu.org>." +#endif + +#line 23 "xmltags.gperf" + +#include <string.h> +#include "xmlparse.h" +#line 28 "xmltags.gperf" +struct XMLTagHashPair; + +#define TOTAL_KEYWORDS 55 +#define MIN_WORD_LENGTH 1 +#define MAX_WORD_LENGTH 17 +#define MIN_HASH_VALUE 5 +#define MAX_HASH_VALUE 84 +/* maximum key range = 80, duplicates = 0 */ + +#include "phash.h" + +inline unsigned int +Perfect_Hash::hash (register const char *str, register unsigned int len) +{ + static const unsigned char asso_values[] = + { + 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, + 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, + 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, + 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, + 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, + 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, + 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, + 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, + 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, + 85, 85, 85, 85, 85, 20, 85, 5, 41, 35, + 5, 35, 85, 15, 10, 0, 85, 85, 40, 0, + 15, 85, 40, 85, 25, 0, 10, 85, 85, 0, + 56, 85, 85, 85, 85, 85, 85, 85, 85, 85, + 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, + 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, + 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, + 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, + 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, + 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, + 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, + 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, + 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, + 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, + 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, + 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, + 85, 85, 85, 85, 85, 85 + }; + int hval = len; + + switch (hval) + { + default: + hval += asso_values[(unsigned char)str[2]]; + /*FALLTHROUGH*/ + case 2: + case 1: + hval += asso_values[(unsigned char)str[0]]; + break; + } + return hval; +} + +struct XMLTagHashPair * +Perfect_Hash::in_word_set (register const char *str, register unsigned int len) +{ + static struct XMLTagHashPair wordlist[] = + { + {""}, {""}, {""}, {""}, {""}, +#line 74 "xmltags.gperf" + {"write", TAG_write}, + {""}, {""}, +#line 68 "xmltags.gperf" + {"init_act", TAG_init_act}, + {""}, +#line 34 "xmltags.gperf" + {"state", TAG_state}, +#line 36 "xmltags.gperf" + {"t", TAG_t}, + {""}, +#line 72 "xmltags.gperf" + {"init_tokstart", TAG_init_tokstart}, +#line 32 "xmltags.gperf" + {"host", TAG_host}, +#line 33 "xmltags.gperf" + {"state_list", TAG_state_list}, +#line 38 "xmltags.gperf" + {"start_state", TAG_start_state}, +#line 69 "xmltags.gperf" + {"set_act", TAG_set_act}, +#line 46 "xmltags.gperf" + {"state_actions", TAG_state_actions}, +#line 65 "xmltags.gperf" + {"data", TAG_data}, +#line 71 "xmltags.gperf" + {"set_tokend", TAG_set_tokend}, +#line 41 "xmltags.gperf" + {"action", TAG_action}, +#line 73 "xmltags.gperf" + {"set_tokstart", TAG_set_tokstart}, +#line 78 "xmltags.gperf" + {"arg", TAG_arg}, + {""}, +#line 35 "xmltags.gperf" + {"trans_list", TAG_trans_list}, +#line 40 "xmltags.gperf" + {"action_list", TAG_action_list}, +#line 43 "xmltags.gperf" + {"action_table", TAG_action_table}, + {""}, +#line 49 "xmltags.gperf" + {"goto", TAG_goto}, + {""}, +#line 45 "xmltags.gperf" + {"getkey", TAG_getkey}, +#line 42 "xmltags.gperf" + {"action_table_list", TAG_action_table_list}, + {""}, +#line 52 "xmltags.gperf" + {"goto_expr", TAG_goto_expr}, +#line 70 "xmltags.gperf" + {"get_tokend", TAG_get_tokend}, +#line 82 "xmltags.gperf" + {"c", TAG_c}, +#line 84 "xmltags.gperf" + {"ex", TAG_ex}, +#line 55 "xmltags.gperf" + {"ret", TAG_ret}, + {""}, +#line 63 "xmltags.gperf" + {"targs", TAG_targs}, + {""}, +#line 37 "xmltags.gperf" + {"machine", TAG_machine}, + {""}, +#line 57 "xmltags.gperf" + {"char", TAG_char}, +#line 30 "xmltags.gperf" + {"ragel", TAG_ragel}, +#line 76 "xmltags.gperf" + {"access", TAG_access}, + {""}, {""}, +#line 31 "xmltags.gperf" + {"ragel_def", TAG_ragel_def}, +#line 64 "xmltags.gperf" + {"entry", TAG_entry}, +#line 67 "xmltags.gperf" + {"sub_action", TAG_sub_action}, + {""}, +#line 44 "xmltags.gperf" + {"alphtype", TAG_alphtype}, +#line 58 "xmltags.gperf" + {"hold", TAG_hold}, +#line 56 "xmltags.gperf" + {"pchar", TAG_pchar}, +#line 60 "xmltags.gperf" + {"holdte", TAG_holdte}, +#line 47 "xmltags.gperf" + {"entry_points", TAG_entry_points}, + {""}, +#line 81 "xmltags.gperf" + {"cond_list", TAG_cond_list}, +#line 80 "xmltags.gperf" + {"cond_space", TAG_cond_space}, + {""}, {""}, {""}, +#line 62 "xmltags.gperf" + {"curs", TAG_curs}, +#line 79 "xmltags.gperf" + {"cond_space_list", TAG_cond_space_list}, + {""}, {""}, +#line 75 "xmltags.gperf" + {"curstate", TAG_curstate}, +#line 66 "xmltags.gperf" + {"lm_switch", TAG_lm_switch}, +#line 48 "xmltags.gperf" + {"text", TAG_text}, +#line 39 "xmltags.gperf" + {"error_state", TAG_error_state}, + {""}, {""}, +#line 59 "xmltags.gperf" + {"exec", TAG_exec}, +#line 51 "xmltags.gperf" + {"next", TAG_next}, +#line 61 "xmltags.gperf" + {"execte", TAG_execte}, + {""}, {""}, +#line 50 "xmltags.gperf" + {"call", TAG_call}, +#line 54 "xmltags.gperf" + {"next_expr", TAG_next_expr}, +#line 77 "xmltags.gperf" + {"break", TAG_break}, +#line 83 "xmltags.gperf" + {"exports", TAG_exports}, + {""}, +#line 53 "xmltags.gperf" + {"call_expr", TAG_call_expr} + }; + + if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH) + { + int key = hash (str, len); + + if (key <= MAX_HASH_VALUE && key >= 0) + { + const char *s = wordlist[key].name; + + if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0') + return &wordlist[key]; + } + } + return 0; +} diff --git a/contrib/tools/ragel5/redfsm/ya.make b/contrib/tools/ragel5/redfsm/ya.make new file mode 100644 index 0000000000..8bb2b97d44 --- /dev/null +++ b/contrib/tools/ragel5/redfsm/ya.make @@ -0,0 +1,25 @@ +LIBRARY() + +LICENSE(GPL-2.0-or-later) + +NO_UTIL() +NO_COMPILER_WARNINGS() + +ADDINCL( + GLOBAL contrib/tools/ragel5/redfsm +) + +PEERDIR( + contrib/tools/ragel5/aapl + contrib/tools/ragel5/common +) + +SRCS( + gendata.cpp + redfsm.cpp + xmlparse.cpp + xmlscan.cpp + xmltags.cpp +) + +END() diff --git a/contrib/tools/ragel5/rlgen-cd/fflatcodegen.cpp b/contrib/tools/ragel5/rlgen-cd/fflatcodegen.cpp new file mode 100644 index 0000000000..813347fd2b --- /dev/null +++ b/contrib/tools/ragel5/rlgen-cd/fflatcodegen.cpp @@ -0,0 +1,351 @@ +/* + * Copyright 2004-2006 Adrian Thurston <thurston@cs.queensu.ca> + * 2004 Erich Ocean <eric.ocean@ampede.com> + * 2005 Alan West <alan@alanz.com> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "rlgen-cd.h" +#include "fflatcodegen.h" +#include "redfsm.h" +#include "gendata.h" + +std::ostream &FFlatCodeGen::TO_STATE_ACTION( RedStateAp *state ) +{ + int act = 0; + if ( state->toStateAction != 0 ) + act = state->toStateAction->actListId+1; + out << act; + return out; +} + +std::ostream &FFlatCodeGen::FROM_STATE_ACTION( RedStateAp *state ) +{ + int act = 0; + if ( state->fromStateAction != 0 ) + act = state->fromStateAction->actListId+1; + out << act; + return out; +} + +std::ostream &FFlatCodeGen::EOF_ACTION( RedStateAp *state ) +{ + int act = 0; + if ( state->eofAction != 0 ) + act = state->eofAction->actListId+1; + out << act; + return out; +} + +/* Write out the function for a transition. */ +std::ostream &FFlatCodeGen::TRANS_ACTION( RedTransAp *trans ) +{ + int action = 0; + if ( trans->action != 0 ) + action = trans->action->actListId+1; + out << action; + return out; +} + +/* Write out the function switch. This switch is keyed on the values + * of the func index. */ +std::ostream &FFlatCodeGen::TO_STATE_ACTION_SWITCH() +{ + /* Loop the actions. */ + for ( ActionTableMap::Iter redAct = redFsm->actionMap; redAct.lte(); redAct++ ) { + if ( redAct->numToStateRefs > 0 ) { + /* Write the entry label. */ + out << "\tcase " << redAct->actListId+1 << ":\n"; + + /* Write each action in the list of action items. */ + for ( ActionTable::Iter item = redAct->key; item.lte(); item++ ) + ACTION( out, item->value, 0, false ); + + out << "\tbreak;\n"; + } + } + + genLineDirective( out ); + return out; +} + +/* Write out the function switch. This switch is keyed on the values + * of the func index. */ +std::ostream &FFlatCodeGen::FROM_STATE_ACTION_SWITCH() +{ + /* Loop the actions. */ + for ( ActionTableMap::Iter redAct = redFsm->actionMap; redAct.lte(); redAct++ ) { + if ( redAct->numFromStateRefs > 0 ) { + /* Write the entry label. */ + out << "\tcase " << redAct->actListId+1 << ":\n"; + + /* Write each action in the list of action items. */ + for ( ActionTable::Iter item = redAct->key; item.lte(); item++ ) + ACTION( out, item->value, 0, false ); + + out << "\tbreak;\n"; + } + } + + genLineDirective( out ); + return out; +} + +std::ostream &FFlatCodeGen::EOF_ACTION_SWITCH() +{ + /* Loop the actions. */ + for ( ActionTableMap::Iter redAct = redFsm->actionMap; redAct.lte(); redAct++ ) { + if ( redAct->numEofRefs > 0 ) { + /* Write the entry label. */ + out << "\tcase " << redAct->actListId+1 << ":\n"; + + /* Write each action in the list of action items. */ + for ( ActionTable::Iter item = redAct->key; item.lte(); item++ ) + ACTION( out, item->value, 0, true ); + + out << "\tbreak;\n"; + } + } + + genLineDirective( out ); + return out; +} + +/* Write out the function switch. This switch is keyed on the values + * of the func index. */ +std::ostream &FFlatCodeGen::ACTION_SWITCH() +{ + /* Loop the actions. */ + for ( ActionTableMap::Iter redAct = redFsm->actionMap; redAct.lte(); redAct++ ) { + if ( redAct->numTransRefs > 0 ) { + /* Write the entry label. */ + out << "\tcase " << redAct->actListId+1 << ":\n"; + + /* Write each action in the list of action items. */ + for ( ActionTable::Iter item = redAct->key; item.lte(); item++ ) + ACTION( out, item->value, 0, false ); + + out << "\tbreak;\n"; + } + } + + genLineDirective( out ); + return out; +} + +void FFlatCodeGen::writeData() +{ + if ( redFsm->anyConditions() ) { + OPEN_ARRAY( WIDE_ALPH_TYPE(), CK() ); + COND_KEYS(); + CLOSE_ARRAY() << + "\n"; + + OPEN_ARRAY( ARRAY_TYPE(redFsm->maxCondSpan), CSP() ); + COND_KEY_SPANS(); + CLOSE_ARRAY() << + "\n"; + + OPEN_ARRAY( ARRAY_TYPE(redFsm->maxCond), C() ); + CONDS(); + CLOSE_ARRAY() << + "\n"; + + OPEN_ARRAY( ARRAY_TYPE(redFsm->maxCondIndexOffset), CO() ); + COND_INDEX_OFFSET(); + CLOSE_ARRAY() << + "\n"; + } + + OPEN_ARRAY( WIDE_ALPH_TYPE(), K() ); + KEYS(); + CLOSE_ARRAY() << + "\n"; + + OPEN_ARRAY( ARRAY_TYPE(redFsm->maxSpan), SP() ); + KEY_SPANS(); + CLOSE_ARRAY() << + "\n"; + + OPEN_ARRAY( ARRAY_TYPE(redFsm->maxFlatIndexOffset), IO() ); + FLAT_INDEX_OFFSET(); + CLOSE_ARRAY() << + "\n"; + + OPEN_ARRAY( ARRAY_TYPE(redFsm->maxIndex), I() ); + INDICIES(); + CLOSE_ARRAY() << + "\n"; + + OPEN_ARRAY( ARRAY_TYPE(redFsm->maxState), TT() ); + TRANS_TARGS(); + CLOSE_ARRAY() << + "\n"; + + if ( redFsm->anyActions() ) { + OPEN_ARRAY( ARRAY_TYPE(redFsm->maxActListId), TA() ); + TRANS_ACTIONS(); + CLOSE_ARRAY() << + "\n"; + } + + if ( redFsm->anyToStateActions() ) { + OPEN_ARRAY( ARRAY_TYPE(redFsm->maxActionLoc), TSA() ); + TO_STATE_ACTIONS(); + CLOSE_ARRAY() << + "\n"; + } + + if ( redFsm->anyFromStateActions() ) { + OPEN_ARRAY( ARRAY_TYPE(redFsm->maxActionLoc), FSA() ); + FROM_STATE_ACTIONS(); + CLOSE_ARRAY() << + "\n"; + } + + if ( redFsm->anyEofActions() ) { + OPEN_ARRAY( ARRAY_TYPE(redFsm->maxActListId), EA() ); + EOF_ACTIONS(); + CLOSE_ARRAY() << + "\n"; + } + + STATE_IDS(); +} + +void FFlatCodeGen::writeExec() +{ + outLabelUsed = false; + + out << + " {\n" + " int _slen"; + + if ( redFsm->anyRegCurStateRef() ) + out << ", _ps"; + + out << ";\n"; + out << " int _trans"; + + if ( redFsm->anyConditions() ) + out << ", _cond"; + + out << ";\n"; + + out << + " " << PTR_CONST() << WIDE_ALPH_TYPE() << POINTER() << "_keys;\n" + " " << PTR_CONST() << ARRAY_TYPE(redFsm->maxIndex) << POINTER() << "_inds;\n"; + + if ( redFsm->anyConditions() ) { + out << + " " << PTR_CONST() << ARRAY_TYPE(redFsm->maxCond) << POINTER() << "_conds;\n" + " " << WIDE_ALPH_TYPE() << " _widec;\n"; + } + + if ( hasEnd ) { + outLabelUsed = true; + out << + " if ( " << P() << " == " << PE() << " )\n" + " goto _out;\n"; + } + + out << "_resume:\n"; + + if ( redFsm->errState != 0 ) { + outLabelUsed = true; + out << + " if ( " << CS() << " == " << redFsm->errState->id << " )\n" + " goto _out;\n"; + } + + if ( redFsm->anyFromStateActions() ) { + out << + " switch ( " << FSA() << "[" << CS() << "] ) {\n"; + FROM_STATE_ACTION_SWITCH(); + SWITCH_DEFAULT() << + " }\n" + "\n"; + } + + if ( redFsm->anyConditions() ) + COND_TRANSLATE(); + + LOCATE_TRANS(); + + if ( redFsm->anyRegCurStateRef() ) + out << " _ps = " << CS() << ";\n"; + + out << + " " << CS() << " = " << TT() << "[_trans];\n\n"; + + if ( redFsm->anyRegActions() ) { + out << + " if ( " << TA() << "[_trans] == 0 )\n" + " goto _again;\n" + "\n" + " switch ( " << TA() << "[_trans] ) {\n"; + ACTION_SWITCH(); + SWITCH_DEFAULT() << + " }\n" + "\n"; + } + + if ( redFsm->anyRegActions() || redFsm->anyActionGotos() || + redFsm->anyActionCalls() || redFsm->anyActionRets() ) + out << "_again:\n"; + + if ( redFsm->anyToStateActions() ) { + out << + " switch ( " << TSA() << "[" << CS() << "] ) {\n"; + TO_STATE_ACTION_SWITCH(); + SWITCH_DEFAULT() << + " }\n" + "\n"; + } + + if ( hasEnd ) { + out << + " if ( ++" << P() << " != " << PE() << " )\n" + " goto _resume;\n"; + } + else { + out << + " " << P() << " += 1;\n" + " goto _resume;\n"; + } + + if ( outLabelUsed ) + out << " _out: {}\n"; + + out << " }\n"; +} + +void FFlatCodeGen::writeEOF() +{ + if ( redFsm->anyEofActions() ) { + out << + " {\n" + " switch ( " << EA() << "[" << CS() << "] ) {\n"; + EOF_ACTION_SWITCH(); + SWITCH_DEFAULT() << + " }\n" + " }\n" + "\n"; + } +} diff --git a/contrib/tools/ragel5/rlgen-cd/fflatcodegen.h b/contrib/tools/ragel5/rlgen-cd/fflatcodegen.h new file mode 100644 index 0000000000..cf92fd9baf --- /dev/null +++ b/contrib/tools/ragel5/rlgen-cd/fflatcodegen.h @@ -0,0 +1,76 @@ +/* + * Copyright 2004-2006 Adrian Thurston <thurston@cs.queensu.ca> + * 2004 Erich Ocean <eric.ocean@ampede.com> + * 2005 Alan West <alan@alanz.com> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _FFLATCODEGEN_H +#define _FFLATCODEGEN_H + +#include <iostream> +#include "flatcodegen.h" + +/* Forwards. */ +struct CodeGenData; + +/* + * FFlatCodeGen + */ +class FFlatCodeGen : public FlatCodeGen +{ +protected: + FFlatCodeGen( ostream &out ) : FsmCodeGen(out), FlatCodeGen(out) {} + + std::ostream &TO_STATE_ACTION_SWITCH(); + std::ostream &FROM_STATE_ACTION_SWITCH(); + std::ostream &EOF_ACTION_SWITCH(); + std::ostream &ACTION_SWITCH(); + + virtual std::ostream &TO_STATE_ACTION( RedStateAp *state ); + virtual std::ostream &FROM_STATE_ACTION( RedStateAp *state ); + virtual std::ostream &EOF_ACTION( RedStateAp *state ); + virtual std::ostream &TRANS_ACTION( RedTransAp *trans ); + + virtual void writeData(); + virtual void writeEOF(); + virtual void writeExec(); +}; + +/* + * CFFlatCodeGen + */ +struct CFFlatCodeGen + : public FFlatCodeGen, public CCodeGen +{ + CFFlatCodeGen( ostream &out ) : + FsmCodeGen(out), FFlatCodeGen(out), CCodeGen(out) {} +}; + +/* + * DFFlatCodeGen + */ +struct DFFlatCodeGen + : public FFlatCodeGen, public DCodeGen +{ + DFFlatCodeGen( ostream &out ) : + FsmCodeGen(out), FFlatCodeGen(out), DCodeGen(out) {} +}; + +#endif /* _FFLATCODEGEN_H */ diff --git a/contrib/tools/ragel5/rlgen-cd/fgotocodegen.cpp b/contrib/tools/ragel5/rlgen-cd/fgotocodegen.cpp new file mode 100644 index 0000000000..9c4f039f39 --- /dev/null +++ b/contrib/tools/ragel5/rlgen-cd/fgotocodegen.cpp @@ -0,0 +1,262 @@ +/* + * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca> + * 2004 Erich Ocean <eric.ocean@ampede.com> + * 2005 Alan West <alan@alanz.com> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "rlgen-cd.h" +#include "fgotocodegen.h" +#include "redfsm.h" +#include "gendata.h" +#include "bstmap.h" + +std::ostream &FGotoCodeGen::EXEC_ACTIONS() +{ + /* Loop the actions. */ + for ( ActionTableMap::Iter redAct = redFsm->actionMap; redAct.lte(); redAct++ ) { + if ( redAct->numTransRefs > 0 ) { + /* We are at the start of a glob, write the case. */ + out << "f" << redAct->actListId << ":\n"; + + /* Write each action in the list of action items. */ + for ( ActionTable::Iter item = redAct->key; item.lte(); item++ ) + ACTION( out, item->value, 0, false ); + + out << "\tgoto _again;\n"; + } + } + return out; +} + +/* Write out the function switch. This switch is keyed on the values + * of the func index. */ +std::ostream &FGotoCodeGen::TO_STATE_ACTION_SWITCH() +{ + /* Loop the actions. */ + for ( ActionTableMap::Iter redAct = redFsm->actionMap; redAct.lte(); redAct++ ) { + if ( redAct->numToStateRefs > 0 ) { + /* Write the entry label. */ + out << "\tcase " << redAct->actListId+1 << ":\n"; + + /* Write each action in the list of action items. */ + for ( ActionTable::Iter item = redAct->key; item.lte(); item++ ) + ACTION( out, item->value, 0, false ); + + out << "\tbreak;\n"; + } + } + + genLineDirective( out ); + return out; +} + +/* Write out the function switch. This switch is keyed on the values + * of the func index. */ +std::ostream &FGotoCodeGen::FROM_STATE_ACTION_SWITCH() +{ + /* Loop the actions. */ + for ( ActionTableMap::Iter redAct = redFsm->actionMap; redAct.lte(); redAct++ ) { + if ( redAct->numFromStateRefs > 0 ) { + /* Write the entry label. */ + out << "\tcase " << redAct->actListId+1 << ":\n"; + + /* Write each action in the list of action items. */ + for ( ActionTable::Iter item = redAct->key; item.lte(); item++ ) + ACTION( out, item->value, 0, false ); + + out << "\tbreak;\n"; + } + } + + genLineDirective( out ); + return out; +} + +std::ostream &FGotoCodeGen::EOF_ACTION_SWITCH() +{ + /* Loop the actions. */ + for ( ActionTableMap::Iter redAct = redFsm->actionMap; redAct.lte(); redAct++ ) { + if ( redAct->numEofRefs > 0 ) { + /* Write the entry label. */ + out << "\tcase " << redAct->actListId+1 << ":\n"; + + /* Write each action in the list of action items. */ + for ( ActionTable::Iter item = redAct->key; item.lte(); item++ ) + ACTION( out, item->value, 0, true ); + + out << "\tbreak;\n"; + } + } + + genLineDirective( out ); + return out; +} + + +std::ostream &FGotoCodeGen::FINISH_CASES() +{ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* States that are final and have an out action need a case. */ + if ( st->eofAction != 0 ) { + /* Write the case label. */ + out << "\t\tcase " << st->id << ": "; + + /* Jump to the func. */ + out << "goto f" << st->eofAction->actListId << ";\n"; + } + } + + return out; +} + +unsigned int FGotoCodeGen::TO_STATE_ACTION( RedStateAp *state ) +{ + int act = 0; + if ( state->toStateAction != 0 ) + act = state->toStateAction->actListId+1; + return act; +} + +unsigned int FGotoCodeGen::FROM_STATE_ACTION( RedStateAp *state ) +{ + int act = 0; + if ( state->fromStateAction != 0 ) + act = state->fromStateAction->actListId+1; + return act; +} + +unsigned int FGotoCodeGen::EOF_ACTION( RedStateAp *state ) +{ + int act = 0; + if ( state->eofAction != 0 ) + act = state->eofAction->actListId+1; + return act; +} + +void FGotoCodeGen::writeData() +{ + if ( redFsm->anyToStateActions() ) { + OPEN_ARRAY( ARRAY_TYPE(redFsm->maxActionLoc), TSA() ); + TO_STATE_ACTIONS(); + CLOSE_ARRAY() << + "\n"; + } + + if ( redFsm->anyFromStateActions() ) { + OPEN_ARRAY( ARRAY_TYPE(redFsm->maxActionLoc), FSA() ); + FROM_STATE_ACTIONS(); + CLOSE_ARRAY() << + "\n"; + } + + if ( redFsm->anyEofActions() ) { + OPEN_ARRAY( ARRAY_TYPE(redFsm->maxActionLoc), EA() ); + EOF_ACTIONS(); + CLOSE_ARRAY() << + "\n"; + } + + STATE_IDS(); +} + +void FGotoCodeGen::writeExec() +{ + outLabelUsed = false; + + out << " {\n"; + + if ( redFsm->anyRegCurStateRef() ) + out << " int _ps = 0;\n"; + + if ( redFsm->anyConditions() ) + out << " " << WIDE_ALPH_TYPE() << " _widec;\n"; + + if ( hasEnd ) { + outLabelUsed = true; + out << + " if ( " << P() << " == " << PE() << " )\n" + " goto _out;\n"; + } + + out << "_resume:\n"; + + if ( redFsm->anyFromStateActions() ) { + out << + " switch ( " << FSA() << "[" << CS() << "] ) {\n"; + FROM_STATE_ACTION_SWITCH(); + SWITCH_DEFAULT() << + " }\n" + "\n"; + } + + out << + " switch ( " << CS() << " ) {\n"; + STATE_GOTOS(); + SWITCH_DEFAULT() << + " }\n" + "\n"; + TRANSITIONS() << + "\n"; + + if ( redFsm->anyRegActions() ) + EXEC_ACTIONS() << "\n"; + + out << "_again:\n"; + + if ( redFsm->anyToStateActions() ) { + out << + " switch ( " << TSA() << "[" << CS() << "] ) {\n"; + TO_STATE_ACTION_SWITCH(); + SWITCH_DEFAULT() << + " }\n" + "\n"; + } + + if ( hasEnd ) { + out << + " if ( ++" << P() << " != " << PE() << " )\n" + " goto _resume;\n"; + } + else { + out << + " " << P() << " += 1;\n" + " goto _resume;\n"; + } + + + if ( outLabelUsed ) + out << " _out: {}\n"; + + out << " }\n"; +} + +void FGotoCodeGen::writeEOF() +{ + if ( redFsm->anyEofActions() ) { + out << + " {\n" + " switch ( " << EA() << "[" << CS() << "] ) {\n"; + EOF_ACTION_SWITCH(); + SWITCH_DEFAULT() << + " }\n" + " }\n" + "\n"; + } +} diff --git a/contrib/tools/ragel5/rlgen-cd/fgotocodegen.h b/contrib/tools/ragel5/rlgen-cd/fgotocodegen.h new file mode 100644 index 0000000000..076f5c4f7f --- /dev/null +++ b/contrib/tools/ragel5/rlgen-cd/fgotocodegen.h @@ -0,0 +1,76 @@ +/* + * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca> + * 2004 Erich Ocean <eric.ocean@ampede.com> + * 2005 Alan West <alan@alanz.com> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _FGOTOCODEGEN_H +#define _FGOTOCODEGEN_H + +#include <iostream> +#include "gotocodegen.h" + +/* Forwards. */ +struct CodeGenData; + + +/* + * class FGotoCodeGen + */ +class FGotoCodeGen : public GotoCodeGen +{ +public: + FGotoCodeGen( ostream &out ) : FsmCodeGen(out), GotoCodeGen(out) {} + + std::ostream &EXEC_ACTIONS(); + std::ostream &TO_STATE_ACTION_SWITCH(); + std::ostream &FROM_STATE_ACTION_SWITCH(); + std::ostream &FINISH_CASES(); + std::ostream &EOF_ACTION_SWITCH(); + unsigned int TO_STATE_ACTION( RedStateAp *state ); + unsigned int FROM_STATE_ACTION( RedStateAp *state ); + unsigned int EOF_ACTION( RedStateAp *state ); + + virtual void writeData(); + virtual void writeEOF(); + virtual void writeExec(); +}; + +/* + * class CFGotoCodeGen + */ +struct CFGotoCodeGen + : public FGotoCodeGen, public CCodeGen +{ + CFGotoCodeGen( ostream &out ) : + FsmCodeGen(out), FGotoCodeGen(out), CCodeGen(out) {} +}; + +/* + * class DFGotoCodeGen + */ +struct DFGotoCodeGen + : public FGotoCodeGen, public DCodeGen +{ + DFGotoCodeGen( ostream &out ) : + FsmCodeGen(out), FGotoCodeGen(out), DCodeGen(out) {} +}; + +#endif /* _FGOTOCODEGEN_H */ diff --git a/contrib/tools/ragel5/rlgen-cd/flatcodegen.cpp b/contrib/tools/ragel5/rlgen-cd/flatcodegen.cpp new file mode 100644 index 0000000000..117f3798c9 --- /dev/null +++ b/contrib/tools/ragel5/rlgen-cd/flatcodegen.cpp @@ -0,0 +1,766 @@ +/* + * Copyright 2004-2006 Adrian Thurston <thurston@cs.queensu.ca> + * 2004 Erich Ocean <eric.ocean@ampede.com> + * 2005 Alan West <alan@alanz.com> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "rlgen-cd.h" +#include "flatcodegen.h" +#include "redfsm.h" +#include "gendata.h" + +std::ostream &FlatCodeGen::TO_STATE_ACTION( RedStateAp *state ) +{ + int act = 0; + if ( state->toStateAction != 0 ) + act = state->toStateAction->location+1; + out << act; + return out; +} + +std::ostream &FlatCodeGen::FROM_STATE_ACTION( RedStateAp *state ) +{ + int act = 0; + if ( state->fromStateAction != 0 ) + act = state->fromStateAction->location+1; + out << act; + return out; +} + +std::ostream &FlatCodeGen::EOF_ACTION( RedStateAp *state ) +{ + int act = 0; + if ( state->eofAction != 0 ) + act = state->eofAction->location+1; + out << act; + return out; +} + +std::ostream &FlatCodeGen::TRANS_ACTION( RedTransAp *trans ) +{ + /* If there are actions, emit them. Otherwise emit zero. */ + int act = 0; + if ( trans->action != 0 ) + act = trans->action->location+1; + out << act; + return out; +} + +std::ostream &FlatCodeGen::TO_STATE_ACTION_SWITCH() +{ + /* Walk the list of functions, printing the cases. */ + for ( ActionList::Iter act = actionList; act.lte(); act++ ) { + /* Write out referenced actions. */ + if ( act->numToStateRefs > 0 ) { + /* Write the case label, the action and the case break */ + out << "\tcase " << act->actionId << ":\n"; + ACTION( out, act, 0, false ); + out << "\tbreak;\n"; + } + } + + genLineDirective( out ); + return out; +} + +std::ostream &FlatCodeGen::FROM_STATE_ACTION_SWITCH() +{ + /* Walk the list of functions, printing the cases. */ + for ( ActionList::Iter act = actionList; act.lte(); act++ ) { + /* Write out referenced actions. */ + if ( act->numFromStateRefs > 0 ) { + /* Write the case label, the action and the case break */ + out << "\tcase " << act->actionId << ":\n"; + ACTION( out, act, 0, false ); + out << "\tbreak;\n"; + } + } + + genLineDirective( out ); + return out; +} + +std::ostream &FlatCodeGen::EOF_ACTION_SWITCH() +{ + /* Walk the list of functions, printing the cases. */ + for ( ActionList::Iter act = actionList; act.lte(); act++ ) { + /* Write out referenced actions. */ + if ( act->numEofRefs > 0 ) { + /* Write the case label, the action and the case break */ + out << "\tcase " << act->actionId << ":\n"; + ACTION( out, act, 0, true ); + out << "\tbreak;\n"; + } + } + + genLineDirective( out ); + return out; +} + + +std::ostream &FlatCodeGen::ACTION_SWITCH() +{ + /* Walk the list of functions, printing the cases. */ + for ( ActionList::Iter act = actionList; act.lte(); act++ ) { + /* Write out referenced actions. */ + if ( act->numTransRefs > 0 ) { + /* Write the case label, the action and the case break */ + out << "\tcase " << act->actionId << ":\n"; + ACTION( out, act, 0, false ); + out << "\tbreak;\n"; + } + } + + genLineDirective( out ); + return out; +} + + +std::ostream &FlatCodeGen::FLAT_INDEX_OFFSET() +{ + out << "\t"; + int totalStateNum = 0, curIndOffset = 0; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Write the index offset. */ + out << curIndOffset; + if ( !st.last() ) { + out << ", "; + if ( ++totalStateNum % IALL == 0 ) + out << "\n\t"; + } + + /* Move the index offset ahead. */ + if ( st->transList != 0 ) + curIndOffset += keyOps->span( st->lowKey, st->highKey ); + + if ( st->defTrans != 0 ) + curIndOffset += 1; + } + out << "\n"; + return out; +} + +std::ostream &FlatCodeGen::KEY_SPANS() +{ + out << "\t"; + int totalStateNum = 0; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Write singles length. */ + unsigned long long span = 0; + if ( st->transList != 0 ) + span = keyOps->span( st->lowKey, st->highKey ); + out << span; + if ( !st.last() ) { + out << ", "; + if ( ++totalStateNum % IALL == 0 ) + out << "\n\t"; + } + } + out << "\n"; + return out; +} + +std::ostream &FlatCodeGen::TO_STATE_ACTIONS() +{ + out << "\t"; + int totalStateNum = 0; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Write any eof action. */ + TO_STATE_ACTION(st); + if ( !st.last() ) { + out << ", "; + if ( ++totalStateNum % IALL == 0 ) + out << "\n\t"; + } + } + out << "\n"; + return out; +} + +std::ostream &FlatCodeGen::FROM_STATE_ACTIONS() +{ + out << "\t"; + int totalStateNum = 0; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Write any eof action. */ + FROM_STATE_ACTION(st); + if ( !st.last() ) { + out << ", "; + if ( ++totalStateNum % IALL == 0 ) + out << "\n\t"; + } + } + out << "\n"; + return out; +} + +std::ostream &FlatCodeGen::EOF_ACTIONS() +{ + out << "\t"; + int totalStateNum = 0; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Write any eof action. */ + EOF_ACTION(st); + if ( !st.last() ) { + out << ", "; + if ( ++totalStateNum % IALL == 0 ) + out << "\n\t"; + } + } + out << "\n"; + return out; +} + +std::ostream &FlatCodeGen::COND_KEYS() +{ + out << '\t'; + int totalTrans = 0; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Emit just cond low key and cond high key. */ + out << KEY( st->condLowKey ) << ", "; + out << KEY( st->condHighKey ) << ", "; + if ( ++totalTrans % IALL == 0 ) + out << "\n\t"; + } + + /* Output one last number so we don't have to figure out when the last + * entry is and avoid writing a comma. */ + out << 0 << "\n"; + return out; +} + +std::ostream &FlatCodeGen::COND_KEY_SPANS() +{ + out << "\t"; + int totalStateNum = 0; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Write singles length. */ + unsigned long long span = 0; + if ( st->condList != 0 ) + span = keyOps->span( st->condLowKey, st->condHighKey ); + out << span; + if ( !st.last() ) { + out << ", "; + if ( ++totalStateNum % IALL == 0 ) + out << "\n\t"; + } + } + out << "\n"; + return out; +} + +std::ostream &FlatCodeGen::CONDS() +{ + int totalTrans = 0; + out << '\t'; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->condList != 0 ) { + /* Walk the singles. */ + unsigned long long span = keyOps->span( st->condLowKey, st->condHighKey ); + for ( unsigned long long pos = 0; pos < span; pos++ ) { + if ( st->condList[pos] != 0 ) + out << st->condList[pos]->condSpaceId + 1 << ", "; + else + out << "0, "; + if ( ++totalTrans % IALL == 0 ) + out << "\n\t"; + } + } + } + + /* Output one last number so we don't have to figure out when the last + * entry is and avoid writing a comma. */ + out << 0 << "\n"; + return out; +} + +std::ostream &FlatCodeGen::COND_INDEX_OFFSET() +{ + out << "\t"; + int totalStateNum = 0, curIndOffset = 0; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Write the index offset. */ + out << curIndOffset; + if ( !st.last() ) { + out << ", "; + if ( ++totalStateNum % IALL == 0 ) + out << "\n\t"; + } + + /* Move the index offset ahead. */ + if ( st->condList != 0 ) + curIndOffset += keyOps->span( st->condLowKey, st->condHighKey ); + } + out << "\n"; + return out; +} + + +std::ostream &FlatCodeGen::KEYS() +{ + out << '\t'; + int totalTrans = 0; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Emit just low key and high key. */ + out << KEY( st->lowKey ) << ", "; + out << KEY( st->highKey ) << ", "; + if ( ++totalTrans % IALL == 0 ) + out << "\n\t"; + } + + /* Output one last number so we don't have to figure out when the last + * entry is and avoid writing a comma. */ + out << 0 << "\n"; + return out; +} + +std::ostream &FlatCodeGen::INDICIES() +{ + int totalTrans = 0; + out << '\t'; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->transList != 0 ) { + /* Walk the singles. */ + unsigned long long span = keyOps->span( st->lowKey, st->highKey ); + for ( unsigned long long pos = 0; pos < span; pos++ ) { + out << st->transList[pos]->id << ", "; + if ( ++totalTrans % IALL == 0 ) + out << "\n\t"; + } + } + + /* The state's default index goes next. */ + if ( st->defTrans != 0 ) + out << st->defTrans->id << ", "; + + if ( ++totalTrans % IALL == 0 ) + out << "\n\t"; + } + + /* Output one last number so we don't have to figure out when the last + * entry is and avoid writing a comma. */ + out << 0 << "\n"; + return out; +} + +std::ostream &FlatCodeGen::TRANS_TARGS() +{ + /* Transitions must be written ordered by their id. */ + RedTransAp **transPtrs = new RedTransAp*[redFsm->transSet.length()]; + for ( TransApSet::Iter trans = redFsm->transSet; trans.lte(); trans++ ) + transPtrs[trans->id] = trans; + + /* Keep a count of the num of items in the array written. */ + out << '\t'; + int totalStates = 0; + for ( int t = 0; t < redFsm->transSet.length(); t++ ) { + /* Write out the target state. */ + RedTransAp *trans = transPtrs[t]; + out << trans->targ->id; + if ( t < redFsm->transSet.length()-1 ) { + out << ", "; + if ( ++totalStates % IALL == 0 ) + out << "\n\t"; + } + } + out << "\n"; + delete[] transPtrs; + return out; +} + + +std::ostream &FlatCodeGen::TRANS_ACTIONS() +{ + /* Transitions must be written ordered by their id. */ + RedTransAp **transPtrs = new RedTransAp*[redFsm->transSet.length()]; + for ( TransApSet::Iter trans = redFsm->transSet; trans.lte(); trans++ ) + transPtrs[trans->id] = trans; + + /* Keep a count of the num of items in the array written. */ + out << '\t'; + int totalAct = 0; + for ( int t = 0; t < redFsm->transSet.length(); t++ ) { + /* Write the function for the transition. */ + RedTransAp *trans = transPtrs[t]; + TRANS_ACTION( trans ); + if ( t < redFsm->transSet.length()-1 ) { + out << ", "; + if ( ++totalAct % IALL == 0 ) + out << "\n\t"; + } + } + out << "\n"; + delete[] transPtrs; + return out; +} + +void FlatCodeGen::LOCATE_TRANS() +{ + out << + " _keys = " << ARR_OFF( K(), "(" + CS() + "<<1)" ) << ";\n" + " _inds = " << ARR_OFF( I(), IO() + "[" + CS() + "]" ) << ";\n" + "\n" + " _slen = " << SP() << "[" << CS() << "];\n" + " _trans = _inds[ _slen > 0 && _keys[0] <=" << GET_WIDE_KEY() << " &&\n" + " " << GET_WIDE_KEY() << " <= _keys[1] ?\n" + " " << GET_WIDE_KEY() << " - _keys[0] : _slen ];\n" + "\n"; +} + +void FlatCodeGen::GOTO( ostream &ret, int gotoDest, bool inFinish ) +{ + ret << "{" << CS() << " = " << gotoDest << "; " << + CTRL_FLOW() << "goto _again;}"; +} + +void FlatCodeGen::GOTO_EXPR( ostream &ret, InlineItem *ilItem, bool inFinish ) +{ + ret << "{" << CS() << " = ("; + INLINE_LIST( ret, ilItem->children, 0, inFinish ); + ret << "); " << CTRL_FLOW() << "goto _again;}"; +} + +void FlatCodeGen::CURS( ostream &ret, bool inFinish ) +{ + ret << "(_ps)"; +} + +void FlatCodeGen::TARGS( ostream &ret, bool inFinish, int targState ) +{ + ret << "(" << CS() << ")"; +} + +void FlatCodeGen::NEXT( ostream &ret, int nextDest, bool inFinish ) +{ + ret << CS() << " = " << nextDest << ";"; +} + +void FlatCodeGen::NEXT_EXPR( ostream &ret, InlineItem *ilItem, bool inFinish ) +{ + ret << CS() << " = ("; + INLINE_LIST( ret, ilItem->children, 0, inFinish ); + ret << ");"; +} + +void FlatCodeGen::CALL( ostream &ret, int callDest, int targState, bool inFinish ) +{ + ret << "{" << STACK() << "[" << TOP() << "++] = " << CS() << "; " << CS() << " = " << + callDest << "; " << CTRL_FLOW() << "goto _again;}"; +} + + +void FlatCodeGen::CALL_EXPR( ostream &ret, InlineItem *ilItem, int targState, bool inFinish ) +{ + ret << "{" << STACK() << "[" << TOP() << "++] = " << CS() << "; " << CS() << " = ("; + INLINE_LIST( ret, ilItem->children, targState, inFinish ); + ret << "); " << CTRL_FLOW() << "goto _again;}"; +} + + +void FlatCodeGen::RET( ostream &ret, bool inFinish ) +{ + ret << "{" << CS() << " = " << STACK() << "[--" << TOP() << "]; " << + CTRL_FLOW() << "goto _again;}"; +} + +void FlatCodeGen::BREAK( ostream &ret, int targState ) +{ + outLabelUsed = true; + ret << CTRL_FLOW() << "goto _out;"; +} + +void FlatCodeGen::writeData() +{ + /* If there are any transtion functions then output the array. If there + * are none, don't bother emitting an empty array that won't be used. */ + if ( redFsm->anyActions() ) { + OPEN_ARRAY( ARRAY_TYPE(redFsm->maxActArrItem), A() ); + ACTIONS_ARRAY(); + CLOSE_ARRAY() << + "\n"; + } + + if ( redFsm->anyConditions() ) { + OPEN_ARRAY( WIDE_ALPH_TYPE(), CK() ); + COND_KEYS(); + CLOSE_ARRAY() << + "\n"; + + OPEN_ARRAY( ARRAY_TYPE(redFsm->maxCondSpan), CSP() ); + COND_KEY_SPANS(); + CLOSE_ARRAY() << + "\n"; + + OPEN_ARRAY( ARRAY_TYPE(redFsm->maxCond), C() ); + CONDS(); + CLOSE_ARRAY() << + "\n"; + + OPEN_ARRAY( ARRAY_TYPE(redFsm->maxCondIndexOffset), CO() ); + COND_INDEX_OFFSET(); + CLOSE_ARRAY() << + "\n"; + } + + OPEN_ARRAY( WIDE_ALPH_TYPE(), K() ); + KEYS(); + CLOSE_ARRAY() << + "\n"; + + OPEN_ARRAY( ARRAY_TYPE(redFsm->maxSpan), SP() ); + KEY_SPANS(); + CLOSE_ARRAY() << + "\n"; + + OPEN_ARRAY( ARRAY_TYPE(redFsm->maxFlatIndexOffset), IO() ); + FLAT_INDEX_OFFSET(); + CLOSE_ARRAY() << + "\n"; + + OPEN_ARRAY( ARRAY_TYPE(redFsm->maxIndex), I() ); + INDICIES(); + CLOSE_ARRAY() << + "\n"; + + OPEN_ARRAY( ARRAY_TYPE(redFsm->maxState), TT() ); + TRANS_TARGS(); + CLOSE_ARRAY() << + "\n"; + + if ( redFsm->anyActions() ) { + OPEN_ARRAY( ARRAY_TYPE(redFsm->maxActionLoc), TA() ); + TRANS_ACTIONS(); + CLOSE_ARRAY() << + "\n"; + } + + if ( redFsm->anyToStateActions() ) { + OPEN_ARRAY( ARRAY_TYPE(redFsm->maxActionLoc), TSA() ); + TO_STATE_ACTIONS(); + CLOSE_ARRAY() << + "\n"; + } + + if ( redFsm->anyFromStateActions() ) { + OPEN_ARRAY( ARRAY_TYPE(redFsm->maxActionLoc), FSA() ); + FROM_STATE_ACTIONS(); + CLOSE_ARRAY() << + "\n"; + } + + if ( redFsm->anyEofActions() ) { + OPEN_ARRAY( ARRAY_TYPE(redFsm->maxActionLoc), EA() ); + EOF_ACTIONS(); + CLOSE_ARRAY() << + "\n"; + } + + STATE_IDS(); +} + +void FlatCodeGen::COND_TRANSLATE() +{ + out << + " _widec = " << GET_KEY() << ";\n"; + + out << + " _keys = " << ARR_OFF( CK(), "(" + CS() + "<<1)" ) << ";\n" + " _conds = " << ARR_OFF( C(), CO() + "[" + CS() + "]" ) << ";\n" + "\n" + " _slen = " << CSP() << "[" << CS() << "];\n" + " _cond = _slen > 0 && _keys[0] <=" << GET_WIDE_KEY() << " &&\n" + " " << GET_WIDE_KEY() << " <= _keys[1] ?\n" + " _conds[" << GET_WIDE_KEY() << " - _keys[0]] : 0;\n" + "\n"; + + out << + " switch ( _cond ) {\n"; + for ( CondSpaceList::Iter csi = condSpaceList; csi.lte(); csi++ ) { + CondSpace *condSpace = csi; + out << " case " << condSpace->condSpaceId + 1 << ": {\n"; + out << TABS(2) << "_widec = " << CAST(WIDE_ALPH_TYPE()) << "(" << + KEY(condSpace->baseKey) << " + (" << GET_KEY() << + " - " << KEY(keyOps->minKey) << "));\n"; + + for ( CondSet::Iter csi = condSpace->condSet; csi.lte(); csi++ ) { + out << TABS(2) << "if ( "; + CONDITION( out, *csi ); + Size condValOffset = ((1 << csi.pos()) * keyOps->alphSize()); + out << " ) _widec += " << condValOffset << ";\n"; + } + + out << " }\n"; + out << " break;\n"; + } + + SWITCH_DEFAULT(); + + out << + " }\n"; +} + +void FlatCodeGen::writeExec() +{ + outLabelUsed = false; + + out << + " {\n" + " int _slen"; + + if ( redFsm->anyRegCurStateRef() ) + out << ", _ps"; + + out << + ";\n" + " int _trans"; + + if ( redFsm->anyConditions() ) + out << ", _cond"; + out << ";\n"; + + if ( redFsm->anyToStateActions() || + redFsm->anyRegActions() || redFsm->anyFromStateActions() ) + { + out << + " " << PTR_CONST() << ARRAY_TYPE(redFsm->maxActArrItem) << POINTER() << "_acts;\n" + " " << UINT() << " _nacts;\n"; + } + + out << + " " << PTR_CONST() << WIDE_ALPH_TYPE() << POINTER() << "_keys;\n" + " " << PTR_CONST() << ARRAY_TYPE(redFsm->maxIndex) << POINTER() << "_inds;\n"; + + if ( redFsm->anyConditions() ) { + out << + " " << PTR_CONST() << ARRAY_TYPE(redFsm->maxCond) << POINTER() << "_conds;\n" + " " << WIDE_ALPH_TYPE() << " _widec;\n"; + } + + out << "\n"; + + if ( hasEnd ) { + outLabelUsed = true; + out << + " if ( " << P() << " == " << PE() << " )\n" + " goto _out;\n"; + } + + out << "_resume:\n"; + + if ( redFsm->errState != 0 ) { + outLabelUsed = true; + out << + " if ( " << CS() << " == " << redFsm->errState->id << " )\n" + " goto _out;\n"; + } + + if ( redFsm->anyFromStateActions() ) { + out << + " _acts = " << ARR_OFF( A(), FSA() + "[" + CS() + "]" ) << ";\n" + " _nacts = " << CAST(UINT()) << " *_acts++;\n" + " while ( _nacts-- > 0 ) {\n" + " switch ( *_acts++ ) {\n"; + FROM_STATE_ACTION_SWITCH(); + SWITCH_DEFAULT() << + " }\n" + " }\n" + "\n"; + } + + if ( redFsm->anyConditions() ) + COND_TRANSLATE(); + + LOCATE_TRANS(); + + if ( redFsm->anyRegCurStateRef() ) + out << " _ps = " << CS() << ";\n"; + + out << + " " << CS() << " = " << TT() << "[_trans];\n" + "\n"; + + if ( redFsm->anyRegActions() ) { + out << + " if ( " << TA() << "[_trans] == 0 )\n" + " goto _again;\n" + "\n" + " _acts = " << ARR_OFF( A(), TA() + "[_trans]" ) << ";\n" + " _nacts = " << CAST(UINT()) << " *_acts++;\n" + " while ( _nacts-- > 0 ) {\n" + " switch ( *(_acts++) )\n {\n"; + ACTION_SWITCH(); + SWITCH_DEFAULT() << + " }\n" + " }\n" + "\n"; + } + + if ( redFsm->anyRegActions() || redFsm->anyActionGotos() || + redFsm->anyActionCalls() || redFsm->anyActionRets() ) + out << "_again:\n"; + + if ( redFsm->anyToStateActions() ) { + out << + " _acts = " << ARR_OFF( A(), TSA() + "[" + CS() + "]" ) << ";\n" + " _nacts = " << CAST(UINT()) << " *_acts++;\n" + " while ( _nacts-- > 0 ) {\n" + " switch ( *_acts++ ) {\n"; + TO_STATE_ACTION_SWITCH(); + SWITCH_DEFAULT() << + " }\n" + " }\n" + "\n"; + } + + if ( hasEnd ) { + out << + " if ( ++" << P() << " != " << PE() << " )\n" + " goto _resume;\n"; + } + else { + out << + " " << P() << " += 1;\n" + " goto _resume;\n"; + } + + if ( outLabelUsed ) + out << " _out: {}\n"; + + out << " }\n"; +} + +void FlatCodeGen::writeEOF() +{ + if ( redFsm->anyEofActions() ) { + out << + " {\n" + " " << PTR_CONST() << ARRAY_TYPE(redFsm->maxActArrItem) << POINTER() << "_acts = " << + ARR_OFF( A(), EA() + "[" + CS() + "]" ) << ";\n" + " " << UINT() << " _nacts = " << CAST(UINT()) << " *_acts++;\n" + " while ( _nacts-- > 0 ) {\n" + " switch ( *_acts++ ) {\n"; + EOF_ACTION_SWITCH(); + SWITCH_DEFAULT() << + " }\n" + " }\n" + " }\n" + "\n"; + } +} diff --git a/contrib/tools/ragel5/rlgen-cd/flatcodegen.h b/contrib/tools/ragel5/rlgen-cd/flatcodegen.h new file mode 100644 index 0000000000..27dee2ef92 --- /dev/null +++ b/contrib/tools/ragel5/rlgen-cd/flatcodegen.h @@ -0,0 +1,108 @@ +/* + * Copyright 2004-2006 Adrian Thurston <thurston@cs.queensu.ca> + * 2004 Erich Ocean <eric.ocean@ampede.com> + * 2005 Alan West <alan@alanz.com> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _FLATCODEGEN_H +#define _FLATCODEGEN_H + +#include <iostream> +#include "fsmcodegen.h" + +/* Forwards. */ +struct CodeGenData; +struct NameInst; +struct RedTransAp; +struct RedStateAp; + +/* + * FlatCodeGen + */ +class FlatCodeGen : virtual public FsmCodeGen +{ +public: + FlatCodeGen( ostream &out ) : FsmCodeGen(out) {} + virtual ~FlatCodeGen() { } + +protected: + std::ostream &TO_STATE_ACTION_SWITCH(); + std::ostream &FROM_STATE_ACTION_SWITCH(); + std::ostream &EOF_ACTION_SWITCH(); + std::ostream &ACTION_SWITCH(); + std::ostream &KEYS(); + std::ostream &INDICIES(); + std::ostream &FLAT_INDEX_OFFSET(); + std::ostream &KEY_SPANS(); + std::ostream &TO_STATE_ACTIONS(); + std::ostream &FROM_STATE_ACTIONS(); + std::ostream &EOF_ACTIONS(); + std::ostream &TRANS_TARGS(); + std::ostream &TRANS_ACTIONS(); + void LOCATE_TRANS(); + + std::ostream &COND_INDEX_OFFSET(); + void COND_TRANSLATE(); + std::ostream &CONDS(); + std::ostream &COND_KEYS(); + std::ostream &COND_KEY_SPANS(); + + void GOTO( ostream &ret, int gotoDest, bool inFinish ); + void CALL( ostream &ret, int callDest, int targState, bool inFinish ); + void NEXT( ostream &ret, int nextDest, bool inFinish ); + void GOTO_EXPR( ostream &ret, InlineItem *ilItem, bool inFinish ); + void NEXT_EXPR( ostream &ret, InlineItem *ilItem, bool inFinish ); + void CALL_EXPR( ostream &ret, InlineItem *ilItem, int targState, bool inFinish ); + void CURS( ostream &ret, bool inFinish ); + void TARGS( ostream &ret, bool inFinish, int targState ); + void RET( ostream &ret, bool inFinish ); + void BREAK( ostream &ret, int targState ); + + virtual std::ostream &TO_STATE_ACTION( RedStateAp *state ); + virtual std::ostream &FROM_STATE_ACTION( RedStateAp *state ); + virtual std::ostream &EOF_ACTION( RedStateAp *state ); + virtual std::ostream &TRANS_ACTION( RedTransAp *trans ); + + virtual void writeData(); + virtual void writeEOF(); + virtual void writeExec(); +}; + +/* + * CFlatCodeGen + */ +struct CFlatCodeGen + : public FlatCodeGen, public CCodeGen +{ + CFlatCodeGen( ostream &out ) : + FsmCodeGen(out), FlatCodeGen(out), CCodeGen(out) {} +}; + +/* + * DFlatCodeGen + */ +struct DFlatCodeGen + : public FlatCodeGen, public DCodeGen +{ + DFlatCodeGen( ostream &out ) : + FsmCodeGen(out), FlatCodeGen(out), DCodeGen(out) {} +}; + +#endif /* _FLATCODEGEN_H */ diff --git a/contrib/tools/ragel5/rlgen-cd/fsmcodegen.cpp b/contrib/tools/ragel5/rlgen-cd/fsmcodegen.cpp new file mode 100644 index 0000000000..c0fc4b00f5 --- /dev/null +++ b/contrib/tools/ragel5/rlgen-cd/fsmcodegen.cpp @@ -0,0 +1,749 @@ +/* + * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca> + * 2004 Erich Ocean <eric.ocean@ampede.com> + * 2005 Alan West <alan@alanz.com> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "rlgen-cd.h" +#include "fsmcodegen.h" +#include "redfsm.h" +#include "gendata.h" +#include <sstream> +#include <string> +#include <assert.h> + + +using std::ostream; +using std::ostringstream; +using std::string; +using std::cerr; +using std::endl; + +void lineDirective( ostream &out, char *fileName, int line ) +{ + if ( noLineDirectives ) + out << "/* "; + + /* Write the preprocessor line info for to the input file. */ + out << "#line " << line << " \""; + for ( char *pc = fileName; *pc != 0; pc++ ) { + if ( *pc == '\\' ) + out << "\\\\"; + else + out << *pc; + } + out << '"'; + + if ( noLineDirectives ) + out << " */"; + + out << '\n'; +} + +void genLineDirective( ostream &out ) +{ + std::streambuf *sbuf = out.rdbuf(); + output_filter *filter = static_cast<output_filter*>(sbuf); + lineDirective( out, filter->fileName, filter->line + 1 ); +} + + +/* Init code gen with in parameters. */ +FsmCodeGen::FsmCodeGen( ostream &out ) +: + CodeGenData(out) +{ +} + +unsigned int FsmCodeGen::arrayTypeSize( unsigned long maxVal ) +{ + long long maxValLL = (long long) maxVal; + HostType *arrayType = keyOps->typeSubsumes( maxValLL ); + assert( arrayType != 0 ); + return arrayType->size; +} + +string FsmCodeGen::ARRAY_TYPE( unsigned long maxVal ) +{ + long long maxValLL = (long long) maxVal; + HostType *arrayType = keyOps->typeSubsumes( maxValLL ); + assert( arrayType != 0 ); + + string ret = arrayType->data1; + if ( arrayType->data2 != 0 ) { + ret += " "; + ret += arrayType->data2; + } + return ret; +} + + +/* Write out the fsm name. */ +string FsmCodeGen::FSM_NAME() +{ + return fsmName; +} + +/* Emit the offset of the start state as a decimal integer. */ +string FsmCodeGen::START_STATE_ID() +{ + ostringstream ret; + ret << redFsm->startState->id; + return ret.str(); +}; + +/* Write out the array of actions. */ +std::ostream &FsmCodeGen::ACTIONS_ARRAY() +{ + out << "\t0, "; + int totalActions = 1; + for ( ActionTableMap::Iter act = redFsm->actionMap; act.lte(); act++ ) { + /* Write out the length, which will never be the last character. */ + out << act->key.length() << ", "; + /* Put in a line break every 8 */ + if ( totalActions++ % 8 == 7 ) + out << "\n\t"; + + for ( ActionTable::Iter item = act->key; item.lte(); item++ ) { + out << item->value->actionId; + if ( ! (act.last() && item.last()) ) + out << ", "; + + /* Put in a line break every 8 */ + if ( totalActions++ % 8 == 7 ) + out << "\n\t"; + } + } + out << "\n"; + return out; +} + + +string FsmCodeGen::CS() +{ + ostringstream ret; + if ( curStateExpr != 0 ) { + /* Emit the user supplied method of retrieving the key. */ + ret << "("; + INLINE_LIST( ret, curStateExpr, 0, false ); + ret << ")"; + } + else { + /* Expression for retrieving the key, use simple dereference. */ + ret << ACCESS() << "cs"; + } + return ret.str(); +} + +string FsmCodeGen::ACCESS() +{ + ostringstream ret; + if ( accessExpr != 0 ) + INLINE_LIST( ret, accessExpr, 0, false ); + return ret.str(); +} + +string FsmCodeGen::GET_WIDE_KEY() +{ + if ( redFsm->anyConditions() ) + return "_widec"; + else + return GET_KEY(); +} + +string FsmCodeGen::GET_WIDE_KEY( RedStateAp *state ) +{ + if ( state->stateCondList.length() > 0 ) + return "_widec"; + else + return GET_KEY(); +} + +string FsmCodeGen::GET_KEY() +{ + ostringstream ret; + if ( getKeyExpr != 0 ) { + /* Emit the user supplied method of retrieving the key. */ + ret << "("; + INLINE_LIST( ret, getKeyExpr, 0, false ); + ret << ")"; + } + else { + /* Expression for retrieving the key, use simple dereference. */ + ret << "(*" << P() << ")"; + } + return ret.str(); +} + +/* Write out level number of tabs. Makes the nested binary search nice + * looking. */ +string FsmCodeGen::TABS( int level ) +{ + string result; + while ( level-- > 0 ) + result += "\t"; + return result; +} + +/* Write out a key from the fsm code gen. Depends on wether or not the key is + * signed. */ +string FsmCodeGen::KEY( Key key ) +{ + ostringstream ret; + if ( keyOps->isSigned || !hostLang->explicitUnsigned ) + ret << key.getVal(); + else + ret << (unsigned long) key.getVal() << 'u'; + return ret.str(); +} + +void FsmCodeGen::EXEC( ostream &ret, InlineItem *item, int targState, int inFinish ) +{ + /* The parser gives fexec two children. The double brackets are for D + * code. If the inline list is a single word it will get interpreted as a + * C-style cast by the D compiler. */ + ret << "{" << P() << " = (("; + INLINE_LIST( ret, item->children, targState, inFinish ); + ret << "))-1;}"; +} + +void FsmCodeGen::EXECTE( ostream &ret, InlineItem *item, int targState, int inFinish ) +{ + /* Tokend version of exec. */ + + /* The parser gives fexec two children. The double brackets are for D + * code. If the inline list is a single word it will get interpreted as a + * C-style cast by the D compiler. */ + ret << "{" << TOKEND() << " = (("; + INLINE_LIST( ret, item->children, targState, inFinish ); + ret << "));}"; +} + + +void FsmCodeGen::LM_SWITCH( ostream &ret, InlineItem *item, + int targState, int inFinish ) +{ + ret << + " switch( " << ACT() << " ) {\n"; + + /* If the switch handles error then we also forced the error state. It + * will exist. */ + if ( item->handlesError ) { + ret << " case 0: " << TOKEND() << " = " << TOKSTART() << "; "; + GOTO( ret, redFsm->errState->id, inFinish ); + ret << "\n"; + } + + for ( InlineList::Iter lma = *item->children; lma.lte(); lma++ ) { + /* Write the case label, the action and the case break. */ + ret << " case " << lma->lmId << ":\n"; + + /* Write the block and close it off. */ + ret << " {"; + INLINE_LIST( ret, lma->children, targState, inFinish ); + ret << "}\n"; + + ret << " break;\n"; + } + /* Default required for D code. */ + ret << + " default: break;\n" + " }\n" + "\t"; +} + +void FsmCodeGen::SET_ACT( ostream &ret, InlineItem *item ) +{ + ret << ACT() << " = " << item->lmId << ";"; +} + +void FsmCodeGen::SET_TOKEND( ostream &ret, InlineItem *item ) +{ + /* The tokend action sets tokend. */ + ret << TOKEND() << " = " << P(); + if ( item->offset != 0 ) + out << "+" << item->offset; + out << ";"; +} + +void FsmCodeGen::GET_TOKEND( ostream &ret, InlineItem *item ) +{ + ret << TOKEND(); +} + +void FsmCodeGen::INIT_TOKSTART( ostream &ret, InlineItem *item ) +{ + ret << TOKSTART() << " = " << NULL_ITEM() << ";"; +} + +void FsmCodeGen::INIT_ACT( ostream &ret, InlineItem *item ) +{ + ret << ACT() << " = 0;"; +} + +void FsmCodeGen::SET_TOKSTART( ostream &ret, InlineItem *item ) +{ + ret << TOKSTART() << " = " << P() << ";"; +} + +void FsmCodeGen::SUB_ACTION( ostream &ret, InlineItem *item, + int targState, bool inFinish ) +{ + if ( item->children->length() > 0 ) { + /* Write the block and close it off. */ + ret << "{"; + INLINE_LIST( ret, item->children, targState, inFinish ); + ret << "}"; + } +} + + +/* Write out an inline tree structure. Walks the list and possibly calls out + * to virtual functions than handle language specific items in the tree. */ +void FsmCodeGen::INLINE_LIST( ostream &ret, InlineList *inlineList, + int targState, bool inFinish ) +{ + for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) { + switch ( item->type ) { + case InlineItem::Text: + ret << item->data; + break; + case InlineItem::Goto: + GOTO( ret, item->targState->id, inFinish ); + break; + case InlineItem::Call: + CALL( ret, item->targState->id, targState, inFinish ); + break; + case InlineItem::Next: + NEXT( ret, item->targState->id, inFinish ); + break; + case InlineItem::Ret: + RET( ret, inFinish ); + break; + case InlineItem::PChar: + ret << P(); + break; + case InlineItem::Char: + ret << GET_KEY(); + break; + case InlineItem::Hold: + ret << P() << "--;"; + break; + case InlineItem::Exec: + EXEC( ret, item, targState, inFinish ); + break; + case InlineItem::HoldTE: + ret << TOKEND() << "--;"; + break; + case InlineItem::ExecTE: + EXECTE( ret, item, targState, inFinish ); + break; + case InlineItem::Curs: + CURS( ret, inFinish ); + break; + case InlineItem::Targs: + TARGS( ret, inFinish, targState ); + break; + case InlineItem::Entry: + ret << item->targState->id; + break; + case InlineItem::GotoExpr: + GOTO_EXPR( ret, item, inFinish ); + break; + case InlineItem::CallExpr: + CALL_EXPR( ret, item, targState, inFinish ); + break; + case InlineItem::NextExpr: + NEXT_EXPR( ret, item, inFinish ); + break; + case InlineItem::LmSwitch: + LM_SWITCH( ret, item, targState, inFinish ); + break; + case InlineItem::LmSetActId: + SET_ACT( ret, item ); + break; + case InlineItem::LmSetTokEnd: + SET_TOKEND( ret, item ); + break; + case InlineItem::LmGetTokEnd: + GET_TOKEND( ret, item ); + break; + case InlineItem::LmInitTokStart: + INIT_TOKSTART( ret, item ); + break; + case InlineItem::LmInitAct: + INIT_ACT( ret, item ); + break; + case InlineItem::LmSetTokStart: + SET_TOKSTART( ret, item ); + break; + case InlineItem::SubAction: + SUB_ACTION( ret, item, targState, inFinish ); + break; + case InlineItem::Break: + BREAK( ret, targState ); + break; + } + } +} +/* Write out paths in line directives. Escapes any special characters. */ +string FsmCodeGen::LDIR_PATH( char *path ) +{ + ostringstream ret; + for ( char *pc = path; *pc != 0; pc++ ) { + if ( *pc == '\\' ) + ret << "\\\\"; + else + ret << *pc; + } + return ret.str(); +} + +void FsmCodeGen::ACTION( ostream &ret, Action *action, int targState, bool inFinish ) +{ + /* Write the preprocessor line info for going into the source file. */ + lineDirective( ret, sourceFileName, action->loc.line ); + + /* Write the block and close it off. */ + ret << "\t{"; + INLINE_LIST( ret, action->inlineList, targState, inFinish ); + ret << "}\n"; +} + +void FsmCodeGen::CONDITION( ostream &ret, Action *condition ) +{ + ret << "\n"; + lineDirective( ret, sourceFileName, condition->loc.line ); + INLINE_LIST( ret, condition->inlineList, 0, false ); +} + +string FsmCodeGen::ERROR_STATE() +{ + ostringstream ret; + if ( redFsm->errState != 0 ) + ret << redFsm->errState->id; + else + ret << "-1"; + return ret.str(); +} + +string FsmCodeGen::FIRST_FINAL_STATE() +{ + ostringstream ret; + if ( redFsm->firstFinState != 0 ) + ret << redFsm->firstFinState->id; + else + ret << redFsm->nextStateId; + return ret.str(); +} + +void FsmCodeGen::writeInit() +{ + out << " {\n"; + + if ( redFsm->startState != 0 ) + out << "\t" << CS() << " = " << START() << ";\n"; + + /* If there are any calls, then the stack top needs initialization. */ + if ( redFsm->anyActionCalls() || redFsm->anyActionRets() ) + out << "\t" << TOP() << " = 0;\n"; + + if ( hasLongestMatch ) { + out << + " " << TOKSTART() << " = " << NULL_ITEM() << ";\n" + " " << TOKEND() << " = " << NULL_ITEM() << ";\n" + " " << ACT() << " = 0;\n"; + } + out << " }\n"; +} + +string FsmCodeGen::DATA_PREFIX() +{ + if ( dataPrefix ) + return FSM_NAME() + "_"; + return ""; +} + +/* Emit the alphabet data type. */ +string FsmCodeGen::ALPH_TYPE() +{ + string ret = keyOps->alphType->data1; + if ( keyOps->alphType->data2 != 0 ) { + ret += " "; + ret += + keyOps->alphType->data2; + } + return ret; +} + +/* Emit the alphabet data type. */ +string FsmCodeGen::WIDE_ALPH_TYPE() +{ + string ret; + if ( redFsm->maxKey <= keyOps->maxKey ) + ret = ALPH_TYPE(); + else { + long long maxKeyVal = redFsm->maxKey.getLongLong(); + HostType *wideType = keyOps->typeSubsumes( keyOps->isSigned, maxKeyVal ); + assert( wideType != 0 ); + + ret = wideType->data1; + if ( wideType->data2 != 0 ) { + ret += " "; + ret += wideType->data2; + } + } + return ret; +} + +void FsmCodeGen::STATE_IDS() +{ + if ( redFsm->startState != 0 ) + STATIC_VAR( "int", START() ) << " = " << START_STATE_ID() << "};\n"; + + if ( writeFirstFinal ) + STATIC_VAR( "int" , FIRST_FINAL() ) << " = " << FIRST_FINAL_STATE() << "};\n"; + + if ( writeErr ) + STATIC_VAR( "int", ERROR() ) << " = " << ERROR_STATE() << "};\n"; + + out << "\n"; + + if ( entryPointNames.length() > 0 ) { + for ( EntryNameVect::Iter en = entryPointNames; en.lte(); en++ ) { + STATIC_VAR( "int", DATA_PREFIX() + "en_" + *en ) << + " = " << entryPointIds[en.pos()] << "};\n"; + } + out << "\n"; + } +} + + +/* + * Language specific, but style independent code generators functions. + */ + +string CCodeGen::PTR_CONST() +{ + return "const "; +} + +std::ostream &CCodeGen::OPEN_ARRAY( const string& type, const string& name ) +{ + out << "#if defined(__GNUC__)\n"; + out << "static __attribute__((used)) const " << type << " " << name << "[] = {\n"; + out << "#else\n"; + out << "static const " << type << " " << name << "[] = {\n"; + out << "#endif\n"; + return out; +} + +std::ostream &CCodeGen::CLOSE_ARRAY() +{ + return out << "};\n"; +} + +std::ostream &CCodeGen::STATIC_VAR( const string& type, const string& name ) +{ + out << "enum {" << name; + return out; +} + +string CCodeGen::UINT( ) +{ + return "unsigned int"; +} + +string CCodeGen::ARR_OFF( const string& ptr, const string& offset ) +{ + return ptr + " + " + offset; +} + +string CCodeGen::CAST( const string& type ) +{ + return "(" + type + ")"; +} + +string CCodeGen::NULL_ITEM() +{ + return "0"; +} + +string CCodeGen::POINTER() +{ + return " *"; +} + +std::ostream &CCodeGen::SWITCH_DEFAULT() +{ + return out; +} + +string CCodeGen::CTRL_FLOW() +{ + return ""; +} + +void CCodeGen::writeExports() +{ + if ( exportList.length() > 0 ) { + for ( ExportList::Iter ex = exportList; ex.lte(); ex++ ) { + out << "#define " << DATA_PREFIX() << "ex_" << ex->name << " " << + KEY(ex->key) << "\n"; + } + out << "\n"; + } +} + +/* + * D Specific + */ + +string DCodeGen::NULL_ITEM() +{ + return "null"; +} + +string DCodeGen::POINTER() +{ + // multiple items seperated by commas can also be pointer types. + return "* "; +} + +string DCodeGen::PTR_CONST() +{ + return ""; +} + +std::ostream &DCodeGen::OPEN_ARRAY( const string& type, const string& name ) +{ + out << "static const " << type << "[] " << name << " = [\n"; + return out; +} + +std::ostream &DCodeGen::CLOSE_ARRAY() +{ + return out << "];\n"; +} + +std::ostream &DCodeGen::STATIC_VAR( const string& type, const string& name ) +{ + out << "static const " << type << " " << name; + return out; +} + +string DCodeGen::ARR_OFF( const string& ptr, const string& offset ) +{ + return "&" + ptr + "[" + offset + "]"; +} + +string DCodeGen::CAST( const string& type ) +{ + return "cast(" + type + ")"; +} + +string DCodeGen::UINT( ) +{ + return "uint"; +} + +std::ostream &DCodeGen::SWITCH_DEFAULT() +{ + out << " default: break;\n"; + return out; +} + +string DCodeGen::CTRL_FLOW() +{ + return "if (true) "; +} + +void DCodeGen::writeExports() +{ + if ( exportList.length() > 0 ) { + for ( ExportList::Iter ex = exportList; ex.lte(); ex++ ) { + out << "static const " << ALPH_TYPE() << " " << DATA_PREFIX() << + "ex_" << ex->name << " = " << KEY(ex->key) << ";\n"; + } + out << "\n"; + } +} + +/* + * End D-specific code. + */ + +void FsmCodeGen::finishRagelDef() +{ + if ( codeStyle == GenGoto || codeStyle == GenFGoto || + codeStyle == GenIpGoto || codeStyle == GenSplit ) + { + /* For directly executable machines there is no required state + * ordering. Choose a depth-first ordering to increase the + * potential for fall-throughs. */ + redFsm->depthFirstOrdering(); + } + else { + /* The frontend will do this for us, but it may be a good idea to + * force it if the intermediate file is edited. */ + redFsm->sortByStateId(); + } + + /* Choose default transitions and the single transition. */ + redFsm->chooseDefaultSpan(); + + /* Maybe do flat expand, otherwise choose single. */ + if ( codeStyle == GenFlat || codeStyle == GenFFlat ) + redFsm->makeFlat(); + else + redFsm->chooseSingle(); + + /* If any errors have occured in the input file then don't write anything. */ + if ( gblErrorCount > 0 ) + return; + + if ( codeStyle == GenSplit ) + redFsm->partitionFsm( numSplitPartitions ); + + if ( codeStyle == GenIpGoto || codeStyle == GenSplit ) + redFsm->setInTrans(); + + /* Anlayze Machine will find the final action reference counts, among + * other things. We will use these in reporting the usage + * of fsm directives in action code. */ + analyzeMachine(); + + /* Determine if we should use indicies. */ + calcIndexSize(); +} + +ostream &FsmCodeGen::source_warning( const InputLoc &loc ) +{ + cerr << sourceFileName << ":" << loc.line << ":" << loc.col << ": warning: "; + return cerr; +} + +ostream &FsmCodeGen::source_error( const InputLoc &loc ) +{ + gblErrorCount += 1; + assert( sourceFileName != 0 ); + cerr << sourceFileName << ":" << loc.line << ":" << loc.col << ": "; + return cerr; +} + diff --git a/contrib/tools/ragel5/rlgen-cd/fsmcodegen.h b/contrib/tools/ragel5/rlgen-cd/fsmcodegen.h new file mode 100644 index 0000000000..77c76f1b1a --- /dev/null +++ b/contrib/tools/ragel5/rlgen-cd/fsmcodegen.h @@ -0,0 +1,218 @@ +/* + * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca> + * 2004 Erich Ocean <eric.ocean@ampede.com> + * 2005 Alan West <alan@alanz.com> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _FSMCODEGEN_H +#define _FSMCODEGEN_H + +#include <iostream> +#include <string> +#include <stdio.h> +#include "common.h" +#include "gendata.h" + +using std::string; +using std::ostream; + +/* Integer array line length. */ +#define IALL 8 + +/* Forwards. */ +struct RedFsmAp; +struct RedStateAp; +struct CodeGenData; +struct Action; +struct NameInst; +struct InlineItem; +struct InlineList; +struct RedAction; +struct LongestMatch; +struct LongestMatchPart; + +inline string itoa( int i ) +{ + char buf[16]; + sprintf( buf, "%i", i ); + return buf; +} + +/* + * class FsmCodeGen + */ +class FsmCodeGen : public CodeGenData +{ +public: + FsmCodeGen( ostream &out ); + virtual ~FsmCodeGen() {} + + virtual void finishRagelDef(); + virtual void writeInit(); + +protected: + string FSM_NAME(); + string START_STATE_ID(); + ostream &ACTIONS_ARRAY(); + string GET_WIDE_KEY(); + string GET_WIDE_KEY( RedStateAp *state ); + string TABS( int level ); + string KEY( Key key ); + string LDIR_PATH( char *path ); + void ACTION( ostream &ret, Action *action, int targState, bool inFinish ); + void CONDITION( ostream &ret, Action *condition ); + string ALPH_TYPE(); + string WIDE_ALPH_TYPE(); + string ARRAY_TYPE( unsigned long maxVal ); + + virtual string ARR_OFF( const string& ptr, const string& offset ) = 0; + virtual string CAST( const string& type ) = 0; + virtual string UINT() = 0; + virtual string NULL_ITEM() = 0; + virtual string POINTER() = 0; + virtual string GET_KEY(); + virtual ostream &SWITCH_DEFAULT() = 0; + + string P() { return "p"; } + string PE() { return "pe"; } + + string ACCESS(); + string CS(); + string STACK() { return ACCESS() + "stack"; } + string TOP() { return ACCESS() + "top"; } + string TOKSTART() { return ACCESS() + "tokstart"; } + string TOKEND() { return ACCESS() + "tokend"; } + string ACT() { return ACCESS() + "act"; } + + string DATA_PREFIX(); + string PM() { return "_" + DATA_PREFIX() + "partition_map"; } + string C() { return "_" + DATA_PREFIX() + "cond_spaces"; } + string CK() { return "_" + DATA_PREFIX() + "cond_keys"; } + string K() { return "_" + DATA_PREFIX() + "trans_keys"; } + string I() { return "_" + DATA_PREFIX() + "indicies"; } + string CO() { return "_" + DATA_PREFIX() + "cond_offsets"; } + string KO() { return "_" + DATA_PREFIX() + "key_offsets"; } + string IO() { return "_" + DATA_PREFIX() + "index_offsets"; } + string CL() { return "_" + DATA_PREFIX() + "cond_lengths"; } + string SL() { return "_" + DATA_PREFIX() + "single_lengths"; } + string RL() { return "_" + DATA_PREFIX() + "range_lengths"; } + string A() { return "_" + DATA_PREFIX() + "actions"; } + string TA() { return "_" + DATA_PREFIX() + "trans_actions_wi"; } + string TT() { return "_" + DATA_PREFIX() + "trans_targs_wi"; } + string TSA() { return "_" + DATA_PREFIX() + "to_state_actions"; } + string FSA() { return "_" + DATA_PREFIX() + "from_state_actions"; } + string EA() { return "_" + DATA_PREFIX() + "eof_actions"; } + string SP() { return "_" + DATA_PREFIX() + "key_spans"; } + string CSP() { return "_" + DATA_PREFIX() + "cond_key_spans"; } + string START() { return DATA_PREFIX() + "start"; } + string ERROR() { return DATA_PREFIX() + "error"; } + string FIRST_FINAL() { return DATA_PREFIX() + "first_final"; } + string CTXDATA() { return DATA_PREFIX() + "ctxdata"; } + + void INLINE_LIST( ostream &ret, InlineList *inlineList, int targState, bool inFinish ); + virtual void GOTO( ostream &ret, int gotoDest, bool inFinish ) = 0; + virtual void CALL( ostream &ret, int callDest, int targState, bool inFinish ) = 0; + virtual void NEXT( ostream &ret, int nextDest, bool inFinish ) = 0; + virtual void GOTO_EXPR( ostream &ret, InlineItem *ilItem, bool inFinish ) = 0; + virtual void NEXT_EXPR( ostream &ret, InlineItem *ilItem, bool inFinish ) = 0; + virtual void CALL_EXPR( ostream &ret, InlineItem *ilItem, + int targState, bool inFinish ) = 0; + virtual void RET( ostream &ret, bool inFinish ) = 0; + virtual void BREAK( ostream &ret, int targState ) = 0; + virtual void CURS( ostream &ret, bool inFinish ) = 0; + virtual void TARGS( ostream &ret, bool inFinish, int targState ) = 0; + void EXEC( ostream &ret, InlineItem *item, int targState, int inFinish ); + void EXECTE( ostream &ret, InlineItem *item, int targState, int inFinish ); + void LM_SWITCH( ostream &ret, InlineItem *item, int targState, int inFinish ); + void SET_ACT( ostream &ret, InlineItem *item ); + void INIT_TOKSTART( ostream &ret, InlineItem *item ); + void INIT_ACT( ostream &ret, InlineItem *item ); + void SET_TOKSTART( ostream &ret, InlineItem *item ); + void SET_TOKEND( ostream &ret, InlineItem *item ); + void GET_TOKEND( ostream &ret, InlineItem *item ); + void SUB_ACTION( ostream &ret, InlineItem *item, + int targState, bool inFinish ); + void STATE_IDS(); + + string ERROR_STATE(); + string FIRST_FINAL_STATE(); + + virtual string PTR_CONST() = 0; + virtual ostream &OPEN_ARRAY( const string& type, const string& name ) = 0; + virtual ostream &CLOSE_ARRAY() = 0; + virtual ostream &STATIC_VAR( const string& type, const string& name ) = 0; + + virtual string CTRL_FLOW() = 0; + + ostream &source_warning(const InputLoc &loc); + ostream &source_error(const InputLoc &loc); + + unsigned int arrayTypeSize( unsigned long maxVal ); + + bool outLabelUsed; + bool againLabelUsed; + bool useIndicies; + +public: + /* Determine if we should use indicies. */ + virtual void calcIndexSize() {} +}; + +class CCodeGen : virtual public FsmCodeGen +{ +public: + CCodeGen( ostream &out ) : FsmCodeGen(out) {} + + virtual string NULL_ITEM(); + virtual string POINTER(); + virtual ostream &SWITCH_DEFAULT(); + virtual ostream &OPEN_ARRAY( const string& type, const string& name ); + virtual ostream &CLOSE_ARRAY(); + virtual ostream &STATIC_VAR( const string& type, const string& name ); + virtual string ARR_OFF( const string& ptr, const string& offset ); + virtual string CAST( const string& type ); + virtual string UINT(); + virtual string PTR_CONST(); + virtual string CTRL_FLOW(); + + virtual void writeExports(); +}; + +class DCodeGen : virtual public FsmCodeGen +{ +public: + DCodeGen( ostream &out ) : FsmCodeGen(out) {} + + virtual string NULL_ITEM(); + virtual string POINTER(); + virtual ostream &SWITCH_DEFAULT(); + virtual ostream &OPEN_ARRAY( const string& type, const string& name ); + virtual ostream &CLOSE_ARRAY(); + virtual ostream &STATIC_VAR( const string& type, const string& name ); + virtual string ARR_OFF( const string& ptr, const string& offset ); + virtual string CAST( const string& type ); + virtual string UINT(); + virtual string PTR_CONST(); + virtual string CTRL_FLOW(); + + virtual void writeExports(); +}; + +#endif /* _FSMCODEGEN_H */ diff --git a/contrib/tools/ragel5/rlgen-cd/ftabcodegen.cpp b/contrib/tools/ragel5/rlgen-cd/ftabcodegen.cpp new file mode 100644 index 0000000000..1d65e7102c --- /dev/null +++ b/contrib/tools/ragel5/rlgen-cd/ftabcodegen.cpp @@ -0,0 +1,405 @@ +/* + * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca> + * 2004 Erich Ocean <eric.ocean@ampede.com> + * 2005 Alan West <alan@alanz.com> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "rlgen-cd.h" +#include "ftabcodegen.h" +#include "redfsm.h" +#include "gendata.h" + +/* Determine if we should use indicies or not. */ +void FTabCodeGen::calcIndexSize() +{ + int sizeWithInds = 0, sizeWithoutInds = 0; + + /* Calculate cost of using with indicies. */ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + int totalIndex = st->outSingle.length() + st->outRange.length() + + (st->defTrans == 0 ? 0 : 1); + sizeWithInds += arrayTypeSize(redFsm->maxIndex) * totalIndex; + } + sizeWithInds += arrayTypeSize(redFsm->maxState) * redFsm->transSet.length(); + if ( redFsm->anyActions() ) + sizeWithInds += arrayTypeSize(redFsm->maxActListId) * redFsm->transSet.length(); + + /* Calculate the cost of not using indicies. */ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + int totalIndex = st->outSingle.length() + st->outRange.length() + + (st->defTrans == 0 ? 0 : 1); + sizeWithoutInds += arrayTypeSize(redFsm->maxState) * totalIndex; + if ( redFsm->anyActions() ) + sizeWithoutInds += arrayTypeSize(redFsm->maxActListId) * totalIndex; + } + + /* If using indicies reduces the size, use them. */ + useIndicies = sizeWithInds < sizeWithoutInds; +} + +std::ostream &FTabCodeGen::TO_STATE_ACTION( RedStateAp *state ) +{ + int act = 0; + if ( state->toStateAction != 0 ) + act = state->toStateAction->actListId+1; + out << act; + return out; +} + +std::ostream &FTabCodeGen::FROM_STATE_ACTION( RedStateAp *state ) +{ + int act = 0; + if ( state->fromStateAction != 0 ) + act = state->fromStateAction->actListId+1; + out << act; + return out; +} + +std::ostream &FTabCodeGen::EOF_ACTION( RedStateAp *state ) +{ + int act = 0; + if ( state->eofAction != 0 ) + act = state->eofAction->actListId+1; + out << act; + return out; +} + + +/* Write out the function for a transition. */ +std::ostream &FTabCodeGen::TRANS_ACTION( RedTransAp *trans ) +{ + int action = 0; + if ( trans->action != 0 ) + action = trans->action->actListId+1; + out << action; + return out; +} + +/* Write out the function switch. This switch is keyed on the values + * of the func index. */ +std::ostream &FTabCodeGen::TO_STATE_ACTION_SWITCH() +{ + /* Loop the actions. */ + for ( ActionTableMap::Iter redAct = redFsm->actionMap; redAct.lte(); redAct++ ) { + if ( redAct->numToStateRefs > 0 ) { + /* Write the entry label. */ + out << "\tcase " << redAct->actListId+1 << ":\n"; + + /* Write each action in the list of action items. */ + for ( ActionTable::Iter item = redAct->key; item.lte(); item++ ) + ACTION( out, item->value, 0, false ); + + out << "\tbreak;\n"; + } + } + + genLineDirective( out ); + return out; +} + +/* Write out the function switch. This switch is keyed on the values + * of the func index. */ +std::ostream &FTabCodeGen::FROM_STATE_ACTION_SWITCH() +{ + /* Loop the actions. */ + for ( ActionTableMap::Iter redAct = redFsm->actionMap; redAct.lte(); redAct++ ) { + if ( redAct->numFromStateRefs > 0 ) { + /* Write the entry label. */ + out << "\tcase " << redAct->actListId+1 << ":\n"; + + /* Write each action in the list of action items. */ + for ( ActionTable::Iter item = redAct->key; item.lte(); item++ ) + ACTION( out, item->value, 0, false ); + + out << "\tbreak;\n"; + } + } + + genLineDirective( out ); + return out; +} + +std::ostream &FTabCodeGen::EOF_ACTION_SWITCH() +{ + /* Loop the actions. */ + for ( ActionTableMap::Iter redAct = redFsm->actionMap; redAct.lte(); redAct++ ) { + if ( redAct->numEofRefs > 0 ) { + /* Write the entry label. */ + out << "\tcase " << redAct->actListId+1 << ":\n"; + + /* Write each action in the list of action items. */ + for ( ActionTable::Iter item = redAct->key; item.lte(); item++ ) + ACTION( out, item->value, 0, true ); + + out << "\tbreak;\n"; + } + } + + genLineDirective( out ); + return out; +} + +/* Write out the function switch. This switch is keyed on the values + * of the func index. */ +std::ostream &FTabCodeGen::ACTION_SWITCH() +{ + /* Loop the actions. */ + for ( ActionTableMap::Iter redAct = redFsm->actionMap; redAct.lte(); redAct++ ) { + if ( redAct->numTransRefs > 0 ) { + /* Write the entry label. */ + out << "\tcase " << redAct->actListId+1 << ":\n"; + + /* Write each action in the list of action items. */ + for ( ActionTable::Iter item = redAct->key; item.lte(); item++ ) + ACTION( out, item->value, 0, false ); + + out << "\tbreak;\n"; + } + } + + genLineDirective( out ); + return out; +} + +void FTabCodeGen::writeData() +{ + if ( redFsm->anyConditions() ) { + OPEN_ARRAY( ARRAY_TYPE(redFsm->maxCondOffset), CO() ); + COND_OFFSETS(); + CLOSE_ARRAY() << + "\n"; + + OPEN_ARRAY( ARRAY_TYPE(redFsm->maxCondLen), CL() ); + COND_LENS(); + CLOSE_ARRAY() << + "\n"; + + OPEN_ARRAY( WIDE_ALPH_TYPE(), CK() ); + COND_KEYS(); + CLOSE_ARRAY() << + "\n"; + + OPEN_ARRAY( ARRAY_TYPE(redFsm->maxCondSpaceId), C() ); + COND_SPACES(); + CLOSE_ARRAY() << + "\n"; + } + + OPEN_ARRAY( ARRAY_TYPE(redFsm->maxKeyOffset), KO() ); + KEY_OFFSETS(); + CLOSE_ARRAY() << + "\n"; + + OPEN_ARRAY( WIDE_ALPH_TYPE(), K() ); + KEYS(); + CLOSE_ARRAY() << + "\n"; + + OPEN_ARRAY( ARRAY_TYPE(redFsm->maxSingleLen), SL() ); + SINGLE_LENS(); + CLOSE_ARRAY() << + "\n"; + + OPEN_ARRAY( ARRAY_TYPE(redFsm->maxRangeLen), RL() ); + RANGE_LENS(); + CLOSE_ARRAY() << + "\n"; + + OPEN_ARRAY( ARRAY_TYPE(redFsm->maxIndexOffset), IO() ); + INDEX_OFFSETS(); + CLOSE_ARRAY() << + "\n"; + + if ( useIndicies ) { + OPEN_ARRAY( ARRAY_TYPE(redFsm->maxIndex), I() ); + INDICIES(); + CLOSE_ARRAY() << + "\n"; + + OPEN_ARRAY( ARRAY_TYPE(redFsm->maxState), TT() ); + TRANS_TARGS_WI(); + CLOSE_ARRAY() << + "\n"; + + if ( redFsm->anyActions() ) { + OPEN_ARRAY( ARRAY_TYPE(redFsm->maxActListId), TA() ); + TRANS_ACTIONS_WI(); + CLOSE_ARRAY() << + "\n"; + } + } + else { + OPEN_ARRAY( ARRAY_TYPE(redFsm->maxState), TT() ); + TRANS_TARGS(); + CLOSE_ARRAY() << + "\n"; + + if ( redFsm->anyActions() ) { + OPEN_ARRAY( ARRAY_TYPE(redFsm->maxActListId), TA() ); + TRANS_ACTIONS(); + CLOSE_ARRAY() << + "\n"; + } + } + + if ( redFsm->anyToStateActions() ) { + OPEN_ARRAY( ARRAY_TYPE(redFsm->maxActionLoc), TSA() ); + TO_STATE_ACTIONS(); + CLOSE_ARRAY() << + "\n"; + } + + if ( redFsm->anyFromStateActions() ) { + OPEN_ARRAY( ARRAY_TYPE(redFsm->maxActionLoc), FSA() ); + FROM_STATE_ACTIONS(); + CLOSE_ARRAY() << + "\n"; + } + + if ( redFsm->anyEofActions() ) { + OPEN_ARRAY( ARRAY_TYPE(redFsm->maxActListId), EA() ); + EOF_ACTIONS(); + CLOSE_ARRAY() << + "\n"; + } + + STATE_IDS(); +} + +void FTabCodeGen::writeExec() +{ + outLabelUsed = false; + + out << + " {\n" + " int _klen"; + + if ( redFsm->anyRegCurStateRef() ) + out << ", _ps"; + + out << + ";\n" + " " << PTR_CONST() << WIDE_ALPH_TYPE() << POINTER() << "_keys;\n" + " int _trans;\n"; + + if ( redFsm->anyConditions() ) + out << " " << WIDE_ALPH_TYPE() << " _widec;\n"; + + out << "\n"; + + if ( hasEnd ) { + outLabelUsed = true; + out << + " if ( " << P() << " == " << PE() << " )\n" + " goto _out;\n"; + } + + out << "_resume:\n"; + + if ( redFsm->errState != 0 ) { + outLabelUsed = true; + out << + " if ( " << CS() << " == " << redFsm->errState->id << " )\n" + " goto _out;\n"; + } + + if ( redFsm->anyFromStateActions() ) { + out << + " switch ( " << FSA() << "[" << CS() << "] ) {\n"; + FROM_STATE_ACTION_SWITCH(); + SWITCH_DEFAULT() << + " }\n" + "\n"; + } + + if ( redFsm->anyConditions() ) + COND_TRANSLATE(); + + LOCATE_TRANS(); + + out << "_match:\n"; + + if ( redFsm->anyRegCurStateRef() ) + out << " _ps = " << CS() << ";\n"; + + if ( useIndicies ) + out << " _trans = " << I() << "[_trans];\n"; + + out << + " " << CS() << " = " << TT() << "[_trans];\n" + "\n"; + + if ( redFsm->anyRegActions() ) { + out << + " if ( " << TA() << "[_trans] == 0 )\n" + " goto _again;\n" + "\n" + " switch ( " << TA() << "[_trans] ) {\n"; + ACTION_SWITCH(); + SWITCH_DEFAULT() << + " }\n" + "\n"; + } + + if ( redFsm->anyRegActions() || redFsm->anyActionGotos() || + redFsm->anyActionCalls() || redFsm->anyActionRets() ) + out << "_again:\n"; + + if ( redFsm->anyToStateActions() ) { + out << + " switch ( " << TSA() << "[" << CS() << "] ) {\n"; + TO_STATE_ACTION_SWITCH(); + SWITCH_DEFAULT() << + " }\n" + "\n"; + } + + if ( hasEnd ) { + out << + " if ( ++" << P() << " != " << PE() << " )\n" + " goto _resume;\n"; + } + else { + out << + " " << P() << " += 1;\n" + " goto _resume;\n"; + } + + + if ( outLabelUsed ) + out << " _out: {}\n"; + + out << " }\n"; +} + + +void FTabCodeGen::writeEOF() +{ + if ( redFsm->anyEofActions() ) { + out << + " {\n" + " switch ( " << EA() << "[" << CS() << "] ) {\n"; + EOF_ACTION_SWITCH(); + SWITCH_DEFAULT() << + " }\n" + " }\n" + "\n"; + } +} diff --git a/contrib/tools/ragel5/rlgen-cd/ftabcodegen.h b/contrib/tools/ragel5/rlgen-cd/ftabcodegen.h new file mode 100644 index 0000000000..9d26d1cadd --- /dev/null +++ b/contrib/tools/ragel5/rlgen-cd/ftabcodegen.h @@ -0,0 +1,78 @@ +/* + * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca> + * 2004 Erich Ocean <eric.ocean@ampede.com> + * 2005 Alan West <alan@alanz.com> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _FTABCODEGEN_H +#define _FTABCODEGEN_H + +#include <iostream> +#include "tabcodegen.h" + +/* Forwards. */ +struct CodeGenData; + + +/* + * FTabCodeG\verb|e + */ +class FTabCodeGen : public TabCodeGen +{ +protected: + FTabCodeGen( ostream &out ) : FsmCodeGen(out), TabCodeGen(out) {} + + std::ostream &TO_STATE_ACTION_SWITCH(); + std::ostream &FROM_STATE_ACTION_SWITCH(); + std::ostream &EOF_ACTION_SWITCH(); + std::ostream &ACTION_SWITCH(); + + virtual std::ostream &TO_STATE_ACTION( RedStateAp *state ); + virtual std::ostream &FROM_STATE_ACTION( RedStateAp *state ); + virtual std::ostream &EOF_ACTION( RedStateAp *state ); + virtual std::ostream &TRANS_ACTION( RedTransAp *trans ); + virtual void writeData(); + virtual void writeEOF(); + virtual void writeExec(); + virtual void calcIndexSize(); +}; + + +/* + * CFTabCodeGen + */ +struct CFTabCodeGen + : public FTabCodeGen, public CCodeGen +{ + CFTabCodeGen( ostream &out ) : + FsmCodeGen(out), FTabCodeGen(out), CCodeGen(out) {} +}; + +/* + * class DFTabCodeGen + */ +struct DFTabCodeGen + : public FTabCodeGen, public DCodeGen +{ + DFTabCodeGen( ostream &out ) : + FsmCodeGen(out), FTabCodeGen(out), DCodeGen(out) {} +}; + +#endif /* _FTABCODEGEN_H */ diff --git a/contrib/tools/ragel5/rlgen-cd/gotocodegen.cpp b/contrib/tools/ragel5/rlgen-cd/gotocodegen.cpp new file mode 100644 index 0000000000..13be67d097 --- /dev/null +++ b/contrib/tools/ragel5/rlgen-cd/gotocodegen.cpp @@ -0,0 +1,742 @@ +/* + * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca> + * 2004 Erich Ocean <eric.ocean@ampede.com> + * 2005 Alan West <alan@alanz.com> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "rlgen-cd.h" +#include "gotocodegen.h" +#include "redfsm.h" +#include "bstmap.h" +#include "gendata.h" + +/* Emit the goto to take for a given transition. */ +std::ostream &GotoCodeGen::TRANS_GOTO( RedTransAp *trans, int level ) +{ + out << TABS(level) << "goto tr" << trans->id << ";"; + return out; +} + +std::ostream &GotoCodeGen::TO_STATE_ACTION_SWITCH() +{ + /* Walk the list of functions, printing the cases. */ + for ( ActionList::Iter act = actionList; act.lte(); act++ ) { + /* Write out referenced actions. */ + if ( act->numToStateRefs > 0 ) { + /* Write the case label, the action and the case break. */ + out << "\tcase " << act->actionId << ":\n"; + ACTION( out, act, 0, false ); + out << "\tbreak;\n"; + } + } + + genLineDirective( out ); + return out; +} + +std::ostream &GotoCodeGen::FROM_STATE_ACTION_SWITCH() +{ + /* Walk the list of functions, printing the cases. */ + for ( ActionList::Iter act = actionList; act.lte(); act++ ) { + /* Write out referenced actions. */ + if ( act->numFromStateRefs > 0 ) { + /* Write the case label, the action and the case break. */ + out << "\tcase " << act->actionId << ":\n"; + ACTION( out, act, 0, false ); + out << "\tbreak;\n"; + } + } + + genLineDirective( out ); + return out; +} + +std::ostream &GotoCodeGen::EOF_ACTION_SWITCH() +{ + /* Walk the list of functions, printing the cases. */ + for ( ActionList::Iter act = actionList; act.lte(); act++ ) { + /* Write out referenced actions. */ + if ( act->numEofRefs > 0 ) { + /* Write the case label, the action and the case break. */ + out << "\tcase " << act->actionId << ":\n"; + ACTION( out, act, 0, true ); + out << "\tbreak;\n"; + } + } + + genLineDirective( out ); + return out; +} + +std::ostream &GotoCodeGen::ACTION_SWITCH() +{ + /* Walk the list of functions, printing the cases. */ + for ( ActionList::Iter act = actionList; act.lte(); act++ ) { + /* Write out referenced actions. */ + if ( act->numTransRefs > 0 ) { + /* Write the case label, the action and the case break. */ + out << "\tcase " << act->actionId << ":\n"; + ACTION( out, act, 0, false ); + out << "\tbreak;\n"; + } + } + + genLineDirective( out ); + return out; +} + +void GotoCodeGen::GOTO_HEADER( RedStateAp *state ) +{ + /* Label the state. */ + out << "case " << state->id << ":\n"; +} + + +void GotoCodeGen::emitSingleSwitch( RedStateAp *state ) +{ + /* Load up the singles. */ + int numSingles = state->outSingle.length(); + RedTransEl *data = state->outSingle.data; + + if ( numSingles == 1 ) { + /* If there is a single single key then write it out as an if. */ + out << "\tif ( " << GET_WIDE_KEY(state) << " == " << + KEY(data[0].lowKey) << " )\n\t\t"; + + /* Virtual function for writing the target of the transition. */ + TRANS_GOTO(data[0].value, 0) << "\n"; + } + else if ( numSingles > 1 ) { + /* Write out single keys in a switch if there is more than one. */ + out << "\tswitch( " << GET_WIDE_KEY(state) << " ) {\n"; + + /* Write out the single indicies. */ + for ( int j = 0; j < numSingles; j++ ) { + out << "\t\tcase " << KEY(data[j].lowKey) << ": "; + TRANS_GOTO(data[j].value, 0) << "\n"; + } + + /* Emits a default case for D code. */ + SWITCH_DEFAULT(); + + /* Close off the transition switch. */ + out << "\t}\n"; + } +} + +void GotoCodeGen::emitRangeBSearch( RedStateAp *state, int level, int low, int high ) +{ + /* Get the mid position, staying on the lower end of the range. */ + int mid = (low + high) >> 1; + RedTransEl *data = state->outRange.data; + + /* Determine if we need to look higher or lower. */ + bool anyLower = mid > low; + bool anyHigher = mid < high; + + /* Determine if the keys at mid are the limits of the alphabet. */ + bool limitLow = data[mid].lowKey == keyOps->minKey; + bool limitHigh = data[mid].highKey == keyOps->maxKey; + + if ( anyLower && anyHigher ) { + /* Can go lower and higher than mid. */ + out << TABS(level) << "if ( " << GET_WIDE_KEY(state) << " < " << + KEY(data[mid].lowKey) << " ) {\n"; + emitRangeBSearch( state, level+1, low, mid-1 ); + out << TABS(level) << "} else if ( " << GET_WIDE_KEY(state) << " > " << + KEY(data[mid].highKey) << " ) {\n"; + emitRangeBSearch( state, level+1, mid+1, high ); + out << TABS(level) << "} else\n"; + TRANS_GOTO(data[mid].value, level+1) << "\n"; + } + else if ( anyLower && !anyHigher ) { + /* Can go lower than mid but not higher. */ + out << TABS(level) << "if ( " << GET_WIDE_KEY(state) << " < " << + KEY(data[mid].lowKey) << " ) {\n"; + emitRangeBSearch( state, level+1, low, mid-1 ); + + /* if the higher is the highest in the alphabet then there is no + * sense testing it. */ + if ( limitHigh ) { + out << TABS(level) << "} else\n"; + TRANS_GOTO(data[mid].value, level+1) << "\n"; + } + else { + out << TABS(level) << "} else if ( " << GET_WIDE_KEY(state) << " <= " << + KEY(data[mid].highKey) << " )\n"; + TRANS_GOTO(data[mid].value, level+1) << "\n"; + } + } + else if ( !anyLower && anyHigher ) { + /* Can go higher than mid but not lower. */ + out << TABS(level) << "if ( " << GET_WIDE_KEY(state) << " > " << + KEY(data[mid].highKey) << " ) {\n"; + emitRangeBSearch( state, level+1, mid+1, high ); + + /* If the lower end is the lowest in the alphabet then there is no + * sense testing it. */ + if ( limitLow ) { + out << TABS(level) << "} else\n"; + TRANS_GOTO(data[mid].value, level+1) << "\n"; + } + else { + out << TABS(level) << "} else if ( " << GET_WIDE_KEY(state) << " >= " << + KEY(data[mid].lowKey) << " )\n"; + TRANS_GOTO(data[mid].value, level+1) << "\n"; + } + } + else { + /* Cannot go higher or lower than mid. It's mid or bust. What + * tests to do depends on limits of alphabet. */ + if ( !limitLow && !limitHigh ) { + out << TABS(level) << "if ( " << KEY(data[mid].lowKey) << " <= " << + GET_WIDE_KEY(state) << " && " << GET_WIDE_KEY(state) << " <= " << + KEY(data[mid].highKey) << " )\n"; + TRANS_GOTO(data[mid].value, level+1) << "\n"; + } + else if ( limitLow && !limitHigh ) { + out << TABS(level) << "if ( " << GET_WIDE_KEY(state) << " <= " << + KEY(data[mid].highKey) << " )\n"; + TRANS_GOTO(data[mid].value, level+1) << "\n"; + } + else if ( !limitLow && limitHigh ) { + out << TABS(level) << "if ( " << KEY(data[mid].lowKey) << " <= " << + GET_WIDE_KEY(state) << " )\n"; + TRANS_GOTO(data[mid].value, level+1) << "\n"; + } + else { + /* Both high and low are at the limit. No tests to do. */ + TRANS_GOTO(data[mid].value, level+1) << "\n"; + } + } +} + +void GotoCodeGen::STATE_GOTO_ERROR() +{ + /* Label the state and bail immediately. */ + outLabelUsed = true; + RedStateAp *state = redFsm->errState; + out << "case " << state->id << ":\n"; + out << " goto _out;\n"; +} + +void GotoCodeGen::COND_TRANSLATE( StateCond *stateCond, int level ) +{ + CondSpace *condSpace = stateCond->condSpace; + out << TABS(level) << "_widec = " << CAST(WIDE_ALPH_TYPE()) << "(" << + KEY(condSpace->baseKey) << " + (" << GET_KEY() << + " - " << KEY(keyOps->minKey) << "));\n"; + + for ( CondSet::Iter csi = condSpace->condSet; csi.lte(); csi++ ) { + out << TABS(level) << "if ( "; + CONDITION( out, *csi ); + Size condValOffset = ((1 << csi.pos()) * keyOps->alphSize()); + out << " ) _widec += " << condValOffset << ";\n"; + } +} + +void GotoCodeGen::emitCondBSearch( RedStateAp *state, int level, int low, int high ) +{ + /* Get the mid position, staying on the lower end of the range. */ + int mid = (low + high) >> 1; + StateCond **data = state->stateCondVect.data; + + /* Determine if we need to look higher or lower. */ + bool anyLower = mid > low; + bool anyHigher = mid < high; + + /* Determine if the keys at mid are the limits of the alphabet. */ + bool limitLow = data[mid]->lowKey == keyOps->minKey; + bool limitHigh = data[mid]->highKey == keyOps->maxKey; + + if ( anyLower && anyHigher ) { + /* Can go lower and higher than mid. */ + out << TABS(level) << "if ( " << GET_KEY() << " < " << + KEY(data[mid]->lowKey) << " ) {\n"; + emitCondBSearch( state, level+1, low, mid-1 ); + out << TABS(level) << "} else if ( " << GET_KEY() << " > " << + KEY(data[mid]->highKey) << " ) {\n"; + emitCondBSearch( state, level+1, mid+1, high ); + out << TABS(level) << "} else {\n"; + COND_TRANSLATE(data[mid], level+1); + out << TABS(level) << "}\n"; + } + else if ( anyLower && !anyHigher ) { + /* Can go lower than mid but not higher. */ + out << TABS(level) << "if ( " << GET_KEY() << " < " << + KEY(data[mid]->lowKey) << " ) {\n"; + emitCondBSearch( state, level+1, low, mid-1 ); + + /* if the higher is the highest in the alphabet then there is no + * sense testing it. */ + if ( limitHigh ) { + out << TABS(level) << "} else {\n"; + COND_TRANSLATE(data[mid], level+1); + out << TABS(level) << "}\n"; + } + else { + out << TABS(level) << "} else if ( " << GET_KEY() << " <= " << + KEY(data[mid]->highKey) << " ) {\n"; + COND_TRANSLATE(data[mid], level+1); + out << TABS(level) << "}\n"; + } + } + else if ( !anyLower && anyHigher ) { + /* Can go higher than mid but not lower. */ + out << TABS(level) << "if ( " << GET_KEY() << " > " << + KEY(data[mid]->highKey) << " ) {\n"; + emitCondBSearch( state, level+1, mid+1, high ); + + /* If the lower end is the lowest in the alphabet then there is no + * sense testing it. */ + if ( limitLow ) { + out << TABS(level) << "} else {\n"; + COND_TRANSLATE(data[mid], level+1); + out << TABS(level) << "}\n"; + } + else { + out << TABS(level) << "} else if ( " << GET_KEY() << " >= " << + KEY(data[mid]->lowKey) << " ) {\n"; + COND_TRANSLATE(data[mid], level+1); + out << TABS(level) << "}\n"; + } + } + else { + /* Cannot go higher or lower than mid. It's mid or bust. What + * tests to do depends on limits of alphabet. */ + if ( !limitLow && !limitHigh ) { + out << TABS(level) << "if ( " << KEY(data[mid]->lowKey) << " <= " << + GET_KEY() << " && " << GET_KEY() << " <= " << + KEY(data[mid]->highKey) << " ) {\n"; + COND_TRANSLATE(data[mid], level+1); + out << TABS(level) << "}\n"; + } + else if ( limitLow && !limitHigh ) { + out << TABS(level) << "if ( " << GET_KEY() << " <= " << + KEY(data[mid]->highKey) << " ) {\n"; + COND_TRANSLATE(data[mid], level+1); + out << TABS(level) << "}\n"; + } + else if ( !limitLow && limitHigh ) { + out << TABS(level) << "if ( " << KEY(data[mid]->lowKey) << " <= " << + GET_KEY() << " )\n {"; + COND_TRANSLATE(data[mid], level+1); + out << TABS(level) << "}\n"; + } + else { + /* Both high and low are at the limit. No tests to do. */ + COND_TRANSLATE(data[mid], level); + } + } +} + +std::ostream &GotoCodeGen::STATE_GOTOS() +{ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st == redFsm->errState ) + STATE_GOTO_ERROR(); + else { + /* Writing code above state gotos. */ + GOTO_HEADER( st ); + + if ( st->stateCondVect.length() > 0 ) { + out << " _widec = " << GET_KEY() << ";\n"; + emitCondBSearch( st, 1, 0, st->stateCondVect.length() - 1 ); + } + + /* Try singles. */ + if ( st->outSingle.length() > 0 ) + emitSingleSwitch( st ); + + /* Default case is to binary search for the ranges, if that fails then */ + if ( st->outRange.length() > 0 ) + emitRangeBSearch( st, 1, 0, st->outRange.length() - 1 ); + + /* Write the default transition. */ + TRANS_GOTO( st->defTrans, 1 ) << "\n"; + } + } + return out; +} + +std::ostream &GotoCodeGen::TRANSITIONS() +{ + /* Emit any transitions that have functions and that go to + * this state. */ + for ( TransApSet::Iter trans = redFsm->transSet; trans.lte(); trans++ ) { + /* Write the label for the transition so it can be jumped to. */ + out << " tr" << trans->id << ": "; + + /* Destination state. */ + if ( trans->action != 0 && trans->action->anyCurStateRef() ) + out << "_ps = " << CS() << ";"; + out << CS() << " = " << trans->targ->id << "; "; + + if ( trans->action != 0 ) { + /* Write out the transition func. */ + out << "goto f" << trans->action->actListId << ";\n"; + } + else { + /* No code to execute, just loop around. */ + out << "goto _again;\n"; + } + } + return out; +} + +std::ostream &GotoCodeGen::EXEC_FUNCS() +{ + /* Make labels that set acts and jump to execFuncs. Loop func indicies. */ + for ( ActionTableMap::Iter redAct = redFsm->actionMap; redAct.lte(); redAct++ ) { + if ( redAct->numTransRefs > 0 ) { + out << " f" << redAct->actListId << ": " << + "_acts = " << ARR_OFF(A(), itoa( redAct->location+1 ) ) << ";" + " goto execFuncs;\n"; + } + } + + out << + "\n" + "execFuncs:\n" + " _nacts = *_acts++;\n" + " while ( _nacts-- > 0 ) {\n" + " switch ( *_acts++ ) {\n"; + ACTION_SWITCH(); + SWITCH_DEFAULT() << + " }\n" + " }\n" + " goto _again;\n"; + return out; +} + +unsigned int GotoCodeGen::TO_STATE_ACTION( RedStateAp *state ) +{ + int act = 0; + if ( state->toStateAction != 0 ) + act = state->toStateAction->location+1; + return act; +} + +unsigned int GotoCodeGen::FROM_STATE_ACTION( RedStateAp *state ) +{ + int act = 0; + if ( state->fromStateAction != 0 ) + act = state->fromStateAction->location+1; + return act; +} + +unsigned int GotoCodeGen::EOF_ACTION( RedStateAp *state ) +{ + int act = 0; + if ( state->eofAction != 0 ) + act = state->eofAction->location+1; + return act; +} + +std::ostream &GotoCodeGen::TO_STATE_ACTIONS() +{ + /* Take one off for the psuedo start state. */ + int numStates = redFsm->stateList.length(); + unsigned int *vals = new unsigned int[numStates]; + memset( vals, 0, sizeof(unsigned int)*numStates ); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) + vals[st->id] = TO_STATE_ACTION(st); + + out << "\t"; + for ( int st = 0; st < redFsm->nextStateId; st++ ) { + /* Write any eof action. */ + out << vals[st]; + if ( st < numStates-1 ) { + out << ", "; + if ( (st+1) % IALL == 0 ) + out << "\n\t"; + } + } + out << "\n"; + delete[] vals; + return out; +} + +std::ostream &GotoCodeGen::FROM_STATE_ACTIONS() +{ + /* Take one off for the psuedo start state. */ + int numStates = redFsm->stateList.length(); + unsigned int *vals = new unsigned int[numStates]; + memset( vals, 0, sizeof(unsigned int)*numStates ); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) + vals[st->id] = FROM_STATE_ACTION(st); + + out << "\t"; + for ( int st = 0; st < redFsm->nextStateId; st++ ) { + /* Write any eof action. */ + out << vals[st]; + if ( st < numStates-1 ) { + out << ", "; + if ( (st+1) % IALL == 0 ) + out << "\n\t"; + } + } + out << "\n"; + delete[] vals; + return out; +} + +std::ostream &GotoCodeGen::EOF_ACTIONS() +{ + /* Take one off for the psuedo start state. */ + int numStates = redFsm->stateList.length(); + unsigned int *vals = new unsigned int[numStates]; + memset( vals, 0, sizeof(unsigned int)*numStates ); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) + vals[st->id] = EOF_ACTION(st); + + out << "\t"; + for ( int st = 0; st < redFsm->nextStateId; st++ ) { + /* Write any eof action. */ + out << vals[st]; + if ( st < numStates-1 ) { + out << ", "; + if ( (st+1) % IALL == 0 ) + out << "\n\t"; + } + } + out << "\n"; + delete[] vals; + return out; +} + +std::ostream &GotoCodeGen::FINISH_CASES() +{ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* States that are final and have an out action need a case. */ + if ( st->eofAction != 0 ) { + /* Write the case label. */ + out << "\t\tcase " << st->id << ": "; + + /* Write the goto func. */ + out << "goto f" << st->eofAction->actListId << ";\n"; + } + } + + return out; +} + +void GotoCodeGen::GOTO( ostream &ret, int gotoDest, bool inFinish ) +{ + ret << "{" << CS() << " = " << gotoDest << "; " << + CTRL_FLOW() << "goto _again;}"; +} + +void GotoCodeGen::GOTO_EXPR( ostream &ret, InlineItem *ilItem, bool inFinish ) +{ + ret << "{" << CS() << " = ("; + INLINE_LIST( ret, ilItem->children, 0, inFinish ); + ret << "); " << CTRL_FLOW() << "goto _again;}"; +} + +void GotoCodeGen::CURS( ostream &ret, bool inFinish ) +{ + ret << "(_ps)"; +} + +void GotoCodeGen::TARGS( ostream &ret, bool inFinish, int targState ) +{ + ret << "(" << CS() << ")"; +} + +void GotoCodeGen::NEXT( ostream &ret, int nextDest, bool inFinish ) +{ + ret << CS() << " = " << nextDest << ";"; +} + +void GotoCodeGen::NEXT_EXPR( ostream &ret, InlineItem *ilItem, bool inFinish ) +{ + ret << CS() << " = ("; + INLINE_LIST( ret, ilItem->children, 0, inFinish ); + ret << ");"; +} + +void GotoCodeGen::CALL( ostream &ret, int callDest, int targState, bool inFinish ) +{ + ret << "{" << STACK() << "[" << TOP() << "++] = " << CS() << "; " << CS() << " = " << + callDest << "; " << CTRL_FLOW() << "goto _again;}"; +} + +void GotoCodeGen::CALL_EXPR( ostream &ret, InlineItem *ilItem, int targState, bool inFinish ) +{ + ret << "{" << STACK() << "[" << TOP() << "++] = " << CS() << "; " << CS() << " = ("; + INLINE_LIST( ret, ilItem->children, targState, inFinish ); + ret << "); " << CTRL_FLOW() << "goto _again;}"; +} + +void GotoCodeGen::RET( ostream &ret, bool inFinish ) +{ + ret << "{" << CS() << " = " << STACK() << "[--" << TOP() << "]; " << + CTRL_FLOW() << "goto _again;}"; +} + +void GotoCodeGen::BREAK( ostream &ret, int targState ) +{ + outLabelUsed = true; + ret << CTRL_FLOW() << "goto _out;"; +} + +void GotoCodeGen::writeData() +{ + if ( redFsm->anyActions() ) { + OPEN_ARRAY( ARRAY_TYPE(redFsm->maxActArrItem), A() ); + ACTIONS_ARRAY(); + CLOSE_ARRAY() << + "\n"; + } + + if ( redFsm->anyToStateActions() ) { + OPEN_ARRAY( ARRAY_TYPE(redFsm->maxActionLoc), TSA() ); + TO_STATE_ACTIONS(); + CLOSE_ARRAY() << + "\n"; + } + + if ( redFsm->anyFromStateActions() ) { + OPEN_ARRAY( ARRAY_TYPE(redFsm->maxActionLoc), FSA() ); + FROM_STATE_ACTIONS(); + CLOSE_ARRAY() << + "\n"; + } + + if ( redFsm->anyEofActions() ) { + OPEN_ARRAY( ARRAY_TYPE(redFsm->maxActionLoc), EA() ); + EOF_ACTIONS(); + CLOSE_ARRAY() << + "\n"; + } + + STATE_IDS(); +} + +void GotoCodeGen::writeExec() +{ + outLabelUsed = false; + + out << " {\n"; + + if ( redFsm->anyRegCurStateRef() ) + out << " int _ps = 0;\n"; + + if ( redFsm->anyToStateActions() || redFsm->anyRegActions() + || redFsm->anyFromStateActions() ) + { + out << + " " << PTR_CONST() << ARRAY_TYPE(redFsm->maxActArrItem) << POINTER() << "_acts;\n" + " " << UINT() << " _nacts;\n"; + } + + if ( redFsm->anyConditions() ) + out << " " << WIDE_ALPH_TYPE() << " _widec;\n"; + + out << "\n"; + + if ( hasEnd ) { + outLabelUsed = true; + out << + " if ( " << P() << " == " << PE() << " )\n" + " goto _out;\n"; + } + + out << "_resume:\n"; + + if ( redFsm->anyFromStateActions() ) { + out << + " _acts = " << ARR_OFF( A(), FSA() + "[" + CS() + "]" ) << ";\n" + " _nacts = " << CAST(UINT()) << " *_acts++;\n" + " while ( _nacts-- > 0 ) {\n" + " switch ( *_acts++ ) {\n"; + FROM_STATE_ACTION_SWITCH(); + SWITCH_DEFAULT() << + " }\n" + " }\n" + "\n"; + } + + out << + " switch ( " << CS() << " ) {\n"; + STATE_GOTOS(); + SWITCH_DEFAULT() << + " }\n" + "\n"; + TRANSITIONS() << + "\n"; + + if ( redFsm->anyRegActions() ) + EXEC_FUNCS() << "\n"; + + out << "_again:\n"; + + if ( redFsm->anyToStateActions() ) { + out << + " _acts = " << ARR_OFF( A(), TSA() + "[" + CS() + "]" ) << ";\n" + " _nacts = " << CAST(UINT()) << " *_acts++;\n" + " while ( _nacts-- > 0 ) {\n" + " switch ( *_acts++ ) {\n"; + TO_STATE_ACTION_SWITCH(); + SWITCH_DEFAULT() << + " }\n" + " }\n" + "\n"; + } + + if ( hasEnd ) { + out << + " if ( ++" << P() << " != " << PE() << " )\n" + " goto _resume;\n"; + } + else { + out << + " " << P() << " += 1;\n" + " goto _resume;\n"; + } + + if ( outLabelUsed ) + out << " _out: {}\n"; + + out << " }\n"; +} + +void GotoCodeGen::writeEOF() +{ + if ( redFsm->anyEofActions() ) { + out << + " {\n" + " " << PTR_CONST() << ARRAY_TYPE(redFsm->maxActArrItem) << POINTER() << "_acts = " << + ARR_OFF( A(), EA() + "[" + CS() + "]" ) << ";\n" + " " << UINT() << " _nacts = " << CAST(UINT()) << " *_acts++;\n" + " while ( _nacts-- > 0 ) {\n" + " switch ( *_acts++ ) {\n"; + EOF_ACTION_SWITCH(); + SWITCH_DEFAULT() << + " }\n" + " }\n" + " }\n" + "\n"; + } +} diff --git a/contrib/tools/ragel5/rlgen-cd/gotocodegen.h b/contrib/tools/ragel5/rlgen-cd/gotocodegen.h new file mode 100644 index 0000000000..625c2c23bd --- /dev/null +++ b/contrib/tools/ragel5/rlgen-cd/gotocodegen.h @@ -0,0 +1,111 @@ +/* + * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca> + * 2004 Erich Ocean <eric.ocean@ampede.com> + * 2005 Alan West <alan@alanz.com> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _GOTOCODEGEN_H +#define _GOTOCODEGEN_H + +#include <iostream> +#include "fsmcodegen.h" + +/* Forwards. */ +struct CodeGenData; +struct NameInst; +struct RedTransAp; +struct RedStateAp; +struct StateCond; + +/* + * Goto driven fsm. + */ +class GotoCodeGen : virtual public FsmCodeGen +{ +public: + GotoCodeGen( ostream &out ) : FsmCodeGen(out) {} + std::ostream &TO_STATE_ACTION_SWITCH(); + std::ostream &FROM_STATE_ACTION_SWITCH(); + std::ostream &EOF_ACTION_SWITCH(); + std::ostream &ACTION_SWITCH(); + std::ostream &STATE_GOTOS(); + std::ostream &TRANSITIONS(); + std::ostream &EXEC_FUNCS(); + std::ostream &FINISH_CASES(); + + void GOTO( ostream &ret, int gotoDest, bool inFinish ); + void CALL( ostream &ret, int callDest, int targState, bool inFinish ); + void NEXT( ostream &ret, int nextDest, bool inFinish ); + void GOTO_EXPR( ostream &ret, InlineItem *ilItem, bool inFinish ); + void NEXT_EXPR( ostream &ret, InlineItem *ilItem, bool inFinish ); + void CALL_EXPR( ostream &ret, InlineItem *ilItem, int targState, bool inFinish ); + void CURS( ostream &ret, bool inFinish ); + void TARGS( ostream &ret, bool inFinish, int targState ); + void RET( ostream &ret, bool inFinish ); + void BREAK( ostream &ret, int targState ); + + virtual unsigned int TO_STATE_ACTION( RedStateAp *state ); + virtual unsigned int FROM_STATE_ACTION( RedStateAp *state ); + virtual unsigned int EOF_ACTION( RedStateAp *state ); + + std::ostream &TO_STATE_ACTIONS(); + std::ostream &FROM_STATE_ACTIONS(); + std::ostream &EOF_ACTIONS(); + + void COND_TRANSLATE( StateCond *stateCond, int level ); + void emitCondBSearch( RedStateAp *state, int level, int low, int high ); + void STATE_CONDS( RedStateAp *state, bool genDefault ); + + virtual std::ostream &TRANS_GOTO( RedTransAp *trans, int level ); + + void emitSingleSwitch( RedStateAp *state ); + void emitRangeBSearch( RedStateAp *state, int level, int low, int high ); + + /* Called from STATE_GOTOS just before writing the gotos */ + virtual void GOTO_HEADER( RedStateAp *state ); + virtual void STATE_GOTO_ERROR(); + + virtual void writeData(); + virtual void writeEOF(); + virtual void writeExec(); +}; + +/* + * class CGotoCodeGen + */ +struct CGotoCodeGen + : public GotoCodeGen, public CCodeGen +{ + CGotoCodeGen( ostream &out ) : + FsmCodeGen(out), GotoCodeGen(out), CCodeGen(out) {} +}; + +/* + * class DGotoCodeGen + */ +struct DGotoCodeGen + : public GotoCodeGen, public DCodeGen +{ + DGotoCodeGen( ostream &out ) : + FsmCodeGen(out), GotoCodeGen(out), DCodeGen(out) {} +}; + + +#endif /* _GOTOCODEGEN_H */ diff --git a/contrib/tools/ragel5/rlgen-cd/ipgotocodegen.cpp b/contrib/tools/ragel5/rlgen-cd/ipgotocodegen.cpp new file mode 100644 index 0000000000..ed65be5fe0 --- /dev/null +++ b/contrib/tools/ragel5/rlgen-cd/ipgotocodegen.cpp @@ -0,0 +1,414 @@ +/* + * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca> + * 2004 Erich Ocean <eric.ocean@ampede.com> + * 2005 Alan West <alan@alanz.com> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "rlgen-cd.h" +#include "ipgotocodegen.h" +#include "redfsm.h" +#include "gendata.h" +#include "bstmap.h" + +bool IpGotoCodeGen::useAgainLabel() +{ + return redFsm->anyRegActionRets() || + redFsm->anyRegActionByValControl() || + redFsm->anyRegNextStmt(); +} + +void IpGotoCodeGen::GOTO( ostream &ret, int gotoDest, bool inFinish ) +{ + ret << "{" << CTRL_FLOW() << "goto st" << gotoDest << ";}"; +} + +void IpGotoCodeGen::CALL( ostream &ret, int callDest, int targState, bool inFinish ) +{ + ret << "{" << STACK() << "[" << TOP() << "++] = " << targState << + "; " << CTRL_FLOW() << "goto st" << callDest << ";}"; +} + +void IpGotoCodeGen::RET( ostream &ret, bool inFinish ) +{ + ret << "{" << CS() << " = " << STACK() << "[--" << TOP() << "]; " << + CTRL_FLOW() << "goto _again;}"; +} + +void IpGotoCodeGen::GOTO_EXPR( ostream &ret, InlineItem *ilItem, bool inFinish ) +{ + ret << "{" << CS() << " = ("; + INLINE_LIST( ret, ilItem->children, 0, inFinish ); + ret << "); " << CTRL_FLOW() << "goto _again;}"; +} + +void IpGotoCodeGen::CALL_EXPR( ostream &ret, InlineItem *ilItem, int targState, bool inFinish ) +{ + ret << "{" << STACK() << "[" << TOP() << "++] = " << targState << "; " << CS() << " = ("; + INLINE_LIST( ret, ilItem->children, 0, inFinish ); + ret << "); " << CTRL_FLOW() << "goto _again;}"; +} + +void IpGotoCodeGen::NEXT( ostream &ret, int nextDest, bool inFinish ) +{ + ret << CS() << " = " << nextDest << ";"; +} + +void IpGotoCodeGen::NEXT_EXPR( ostream &ret, InlineItem *ilItem, bool inFinish ) +{ + ret << CS() << " = ("; + INLINE_LIST( ret, ilItem->children, 0, inFinish ); + ret << ");"; +} + +void IpGotoCodeGen::CURS( ostream &ret, bool inFinish ) +{ + ret << "(_ps)"; +} + +void IpGotoCodeGen::TARGS( ostream &ret, bool inFinish, int targState ) +{ + ret << targState; +} + +void IpGotoCodeGen::BREAK( ostream &ret, int targState ) +{ + ret << CTRL_FLOW() << "goto _out" << targState << ";"; +} + +bool IpGotoCodeGen::IN_TRANS_ACTIONS( RedStateAp *state ) +{ + bool anyWritten = false; + + /* Emit any transitions that have actions and that go to this state. */ + for ( int it = 0; it < state->numInTrans; it++ ) { + RedTransAp *trans = state->inTrans[it]; + if ( trans->action != 0 && trans->labelNeeded ) { + /* Remember that we wrote an action so we know to write the + * line directive for going back to the output. */ + anyWritten = true; + + /* Write the label for the transition so it can be jumped to. */ + out << "tr" << trans->id << ":\n"; + + /* If the action contains a next, then we must preload the current + * state since the action may or may not set it. */ + if ( trans->action->anyNextStmt() ) + out << " " << CS() << " = " << trans->targ->id << ";\n"; + + /* Write each action in the list. */ + for ( ActionTable::Iter item = trans->action->key; item.lte(); item++ ) + ACTION( out, item->value, trans->targ->id, false ); + + /* If the action contains a next then we need to reload, otherwise + * jump directly to the target state. */ + if ( trans->action->anyNextStmt() ) + out << "\tgoto _again;\n"; + else + out << "\tgoto st" << trans->targ->id << ";\n"; + } + } + + return anyWritten; +} + +/* Called from GotoCodeGen::STATE_GOTOS just before writing the gotos for each + * state. */ +void IpGotoCodeGen::GOTO_HEADER( RedStateAp *state ) +{ + bool anyWritten = IN_TRANS_ACTIONS( state ); + + if ( state->labelNeeded ) + out << "st" << state->id << ":\n"; + + if ( state->toStateAction != 0 ) { + /* Remember that we wrote an action. Write every action in the list. */ + anyWritten = true; + for ( ActionTable::Iter item = state->toStateAction->key; item.lte(); item++ ) + ACTION( out, item->value, state->id, false ); + } + + /* Advance and test buffer pos. */ + if ( state->labelNeeded ) { + if ( hasEnd ) { + out << + " if ( ++" << P() << " == " << PE() << " )\n" + " goto _out" << state->id << ";\n"; + } + else { + out << + " " << P() << " += 1;\n"; + } + } + + /* Give the state a switch case. */ + out << "case " << state->id << ":\n"; + + if ( state->fromStateAction != 0 ) { + /* Remember that we wrote an action. Write every action in the list. */ + anyWritten = true; + for ( ActionTable::Iter item = state->fromStateAction->key; item.lte(); item++ ) + ACTION( out, item->value, state->id, false ); + } + + if ( anyWritten ) + genLineDirective( out ); + + /* Record the prev state if necessary. */ + if ( state->anyRegCurStateRef() ) + out << " _ps = " << state->id << ";\n"; +} + +void IpGotoCodeGen::STATE_GOTO_ERROR() +{ + /* In the error state we need to emit some stuff that usually goes into + * the header. */ + RedStateAp *state = redFsm->errState; + bool anyWritten = IN_TRANS_ACTIONS( state ); + + /* No case label needed since we don't switch on the error state. */ + if ( anyWritten ) + genLineDirective( out ); + + if ( state->labelNeeded ) + out << "st" << state->id << ":\n"; + + /* Break out here. */ + out << " goto _out" << state->id << ";\n"; +} + + +/* Emit the goto to take for a given transition. */ +std::ostream &IpGotoCodeGen::TRANS_GOTO( RedTransAp *trans, int level ) +{ + if ( trans->action != 0 ) { + /* Go to the transition which will go to the state. */ + out << TABS(level) << "goto tr" << trans->id << ";"; + } + else { + /* Go directly to the target state. */ + out << TABS(level) << "goto st" << trans->targ->id << ";"; + } + return out; +} + +std::ostream &IpGotoCodeGen::EXIT_STATES() +{ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->outNeeded ) { + outLabelUsed = true; + out << " _out" << st->id << ": " << CS() << " = " << + st->id << "; goto _out; \n"; + } + } + return out; +} + +std::ostream &IpGotoCodeGen::AGAIN_CASES() +{ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + out << + " case " << st->id << ": goto st" << st->id << ";\n"; + } + return out; +} + +std::ostream &IpGotoCodeGen::FINISH_CASES() +{ + bool anyWritten = false; + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->eofAction != 0 ) { + if ( st->eofAction->eofRefs == 0 ) + st->eofAction->eofRefs = new IntSet; + st->eofAction->eofRefs->insert( st->id ); + } + } + + for ( ActionTableMap::Iter act = redFsm->actionMap; act.lte(); act++ ) { + if ( act->eofRefs != 0 ) { + for ( IntSet::Iter pst = *act->eofRefs; pst.lte(); pst++ ) + out << " case " << *pst << ": \n"; + + /* Remember that we wrote a trans so we know to write the + * line directive for going back to the output. */ + anyWritten = true; + + /* Write each action in the eof action list. */ + for ( ActionTable::Iter item = act->key; item.lte(); item++ ) + ACTION( out, item->value, STATE_ERR_STATE, true ); + out << "\tbreak;\n"; + } + } + + if ( anyWritten ) + genLineDirective( out ); + return out; +} + +void IpGotoCodeGen::setLabelsNeeded( InlineList *inlineList ) +{ + for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) { + switch ( item->type ) { + case InlineItem::Goto: case InlineItem::Call: { + /* Mark the target as needing a label. */ + item->targState->labelNeeded = true; + break; + } + default: break; + } + + if ( item->children != 0 ) + setLabelsNeeded( item->children ); + } +} + +/* Set up labelNeeded flag for each state. */ +void IpGotoCodeGen::setLabelsNeeded() +{ + /* If we use the _again label, then we the _again switch, which uses all + * labels. */ + if ( useAgainLabel() ) { + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) + st->labelNeeded = true; + } + else { + /* Do not use all labels by default, init all labelNeeded vars to false. */ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) + st->labelNeeded = false; + + if ( redFsm->errState != 0 && redFsm->anyLmSwitchError() ) + redFsm->errState->labelNeeded = true; + + /* Walk all transitions and set only those that have targs. */ + for ( TransApSet::Iter trans = redFsm->transSet; trans.lte(); trans++ ) { + /* If there is no action with a next statement, then the label will be + * needed. */ + if ( trans->action == 0 || !trans->action->anyNextStmt() ) + trans->targ->labelNeeded = true; + + /* Need labels for states that have goto or calls in action code + * invoked on characters (ie, not from out action code). */ + if ( trans->action != 0 ) { + /* Loop the actions. */ + for ( ActionTable::Iter act = trans->action->key; act.lte(); act++ ) { + /* Get the action and walk it's tree. */ + setLabelsNeeded( act->value->inlineList ); + } + } + } + } + + if ( hasEnd ) { + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) + st->outNeeded = st->labelNeeded; + } + else { + if ( redFsm->errState != 0 ) + redFsm->errState->outNeeded = true; + + for ( TransApSet::Iter trans = redFsm->transSet; trans.lte(); trans++ ) { + /* Any state with a transition in that has a break will need an + * out label. */ + if ( trans->action != 0 && trans->action->anyBreakStmt() ) + trans->targ->outNeeded = true; + } + } +} + +void IpGotoCodeGen::writeData() +{ + STATE_IDS(); +} + +void IpGotoCodeGen::writeExec() +{ + /* Must set labels immediately before writing because we may depend on the + * noend write option. */ + setLabelsNeeded(); + outLabelUsed = false; + + out << " {\n"; + + if ( redFsm->anyRegCurStateRef() ) + out << " int _ps = 0;\n"; + + if ( redFsm->anyConditions() ) + out << " " << WIDE_ALPH_TYPE() << " _widec;\n"; + + if ( hasEnd ) { + outLabelUsed = true; + out << + " if ( " << P() << " == " << PE() << " )\n" + " goto _out;\n"; + } + + if ( useAgainLabel() ) { + out << + " goto _resume;\n" + "\n" + "_again:\n" + " switch ( " << CS() << " ) {\n"; + AGAIN_CASES() << + " default: break;\n" + " }\n" + "\n"; + + if ( hasEnd ) { + outLabelUsed = true; + out << + " if ( ++" << P() << " == " << PE() << " )\n" + " goto _out;\n"; + } + else { + out << + " " << P() << " += 1;\n"; + } + + out << "_resume:\n"; + } + + out << + " switch ( " << CS() << " )\n {\n"; + STATE_GOTOS(); + SWITCH_DEFAULT() << + " }\n"; + EXIT_STATES() << + "\n"; + + if ( outLabelUsed ) + out << " _out: {}\n"; + + out << + " }\n"; +} + +void IpGotoCodeGen::writeEOF() +{ + if ( redFsm->anyEofActions() ) { + out << + " {\n" + " switch ( " << CS() << " ) {\n"; + FINISH_CASES(); + SWITCH_DEFAULT() << + " }\n" + " }\n" + "\n"; + } +} diff --git a/contrib/tools/ragel5/rlgen-cd/ipgotocodegen.h b/contrib/tools/ragel5/rlgen-cd/ipgotocodegen.h new file mode 100644 index 0000000000..f32678baba --- /dev/null +++ b/contrib/tools/ragel5/rlgen-cd/ipgotocodegen.h @@ -0,0 +1,97 @@ +/* + * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca> + * 2004 Erich Ocean <eric.ocean@ampede.com> + * 2005 Alan West <alan@alanz.com> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _IPGCODEGEN_H +#define _IPGCODEGEN_H + +#include <iostream> +#include "gotocodegen.h" + +/* Forwards. */ +struct CodeGenData; + +/* + * class FGotoCodeGen + */ +class IpGotoCodeGen : public GotoCodeGen +{ +public: + IpGotoCodeGen( ostream &out ) : FsmCodeGen(out), GotoCodeGen(out) {} + + std::ostream &EXIT_STATES(); + std::ostream &TRANS_GOTO( RedTransAp *trans, int level ); + std::ostream &FINISH_CASES(); + std::ostream &AGAIN_CASES(); + + void GOTO( ostream &ret, int gotoDest, bool inFinish ); + void CALL( ostream &ret, int callDest, int targState, bool inFinish ); + void NEXT( ostream &ret, int nextDest, bool inFinish ); + void GOTO_EXPR( ostream &ret, InlineItem *ilItem, bool inFinish ); + void NEXT_EXPR( ostream &ret, InlineItem *ilItem, bool inFinish ); + void CALL_EXPR( ostream &ret, InlineItem *ilItem, int targState, bool inFinish ); + void RET( ostream &ret, bool inFinish ); + void CURS( ostream &ret, bool inFinish ); + void TARGS( ostream &ret, bool inFinish, int targState ); + void BREAK( ostream &ret, int targState ); + + virtual void writeData(); + virtual void writeEOF(); + virtual void writeExec(); + +protected: + bool useAgainLabel(); + + /* Called from GotoCodeGen::STATE_GOTOS just before writing the gotos for + * each state. */ + bool IN_TRANS_ACTIONS( RedStateAp *state ); + void GOTO_HEADER( RedStateAp *state ); + void STATE_GOTO_ERROR(); + + /* Set up labelNeeded flag for each state. */ + void setLabelsNeeded( InlineList *inlineList ); + void setLabelsNeeded(); +}; + + +/* + * class CIpGotoCodeGen + */ +struct CIpGotoCodeGen + : public IpGotoCodeGen, public CCodeGen +{ + CIpGotoCodeGen( ostream &out ) : + FsmCodeGen(out), IpGotoCodeGen(out), CCodeGen(out) {} +}; + +/* + * class DIpGotoCodeGen + */ +struct DIpGotoCodeGen + : public IpGotoCodeGen, public DCodeGen +{ + DIpGotoCodeGen( ostream &out ) : + FsmCodeGen(out), IpGotoCodeGen(out), DCodeGen(out) {} +}; + + +#endif /* _IPGCODEGEN_H */ diff --git a/contrib/tools/ragel5/rlgen-cd/main.cpp b/contrib/tools/ragel5/rlgen-cd/main.cpp new file mode 100644 index 0000000000..cabe4bd97d --- /dev/null +++ b/contrib/tools/ragel5/rlgen-cd/main.cpp @@ -0,0 +1,394 @@ +/* + * Copyright 2001-2007 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <stdlib.h> +#include <string.h> +#include <stdio.h> +#include <iostream> +#include <fstream> +#ifndef _WIN32 +# include <unistd.h> +#endif + +#include "common.h" +#include "rlgen-cd.h" +#include "xmlparse.h" +#include "pcheck.h" +#include "vector.h" +#include "version.h" + +/* Code generators. */ +#include "tabcodegen.h" +#include "ftabcodegen.h" +#include "flatcodegen.h" +#include "fflatcodegen.h" +#include "gotocodegen.h" +#include "fgotocodegen.h" +#include "ipgotocodegen.h" +#include "splitcodegen.h" + +using std::istream; +using std::ifstream; +using std::ostream; +using std::ios; +using std::cin; +using std::cout; +using std::cerr; +using std::endl; + +/* Target language and output style. */ +CodeStyleEnum codeStyle = GenTables; + +/* Io globals. */ +istream *inStream = 0; +ostream *outStream = 0; +output_filter *outFilter = 0; +char *outputFileName = 0; + +/* Graphviz dot file generation. */ +bool graphvizDone = false; + +int numSplitPartitions = 0; +bool noLineDirectives = false; +bool printPrintables = false; + +/* Print a summary of the options. */ +void usage() +{ + cout << +"usage: " PROGNAME " [options] file\n" +"general:\n" +" -h, -H, -?, --help Print this usage and exit\n" +" -v, --version Print version information and exit\n" +" -o <file> Write output to <file>\n" +"code generation options:\n" +" -l Inhibit writing of #line directives\n" +"generated code style:\n" +" -T0 Table driven FSM (default)\n" +" -T1 Faster table driven FSM\n" +" -F0 Flat table driven FSM\n" +" -F1 Faster flat table-driven FSM\n" +" -G0 Goto-driven FSM\n" +" -G1 Faster goto-driven FSM\n" +" -G2 Really fast goto-driven FSM\n" +" -P<N> N-Way Split really fast goto-driven FSM\n" + ; +} + +/* Print version information. */ +void version() +{ + cout << "Ragel Code Generator for C, C++, Objective-C and D" << endl << + "Version " VERSION << ", " PUBDATE << endl << + "Copyright (c) 2001-2007 by Adrian Thurston" << endl; +} + +/* Total error count. */ +int gblErrorCount = 0; + +ostream &error() +{ + gblErrorCount += 1; + cerr << PROGNAME ": "; + return cerr; +} + +/* + * Callbacks invoked by the XML data parser. + */ + +/* Invoked by the parser when the root element is opened. */ +ostream *openOutput( char *inputFile ) +{ + if ( hostLangType != CCode && hostLangType != DCode ) { + error() << "this code generator is for C and D only" << endl; + exit(1); + } + + /* If the output format is code and no output file name is given, then + * make a default. */ + if ( outputFileName == 0 ) { + char *ext = findFileExtension( inputFile ); + if ( ext != 0 && strcmp( ext, ".rh" ) == 0 ) + outputFileName = fileNameFromStem( inputFile, ".h" ); + else { + const char *defExtension = 0; + switch ( hostLangType ) { + case CCode: defExtension = ".c"; break; + case DCode: defExtension = ".d"; break; + default: break; + } + outputFileName = fileNameFromStem( inputFile, defExtension ); + } + } + + /* Make sure we are not writing to the same file as the input file. */ + if ( outputFileName != 0 && strcmp( inputFile, outputFileName ) == 0 ) { + error() << "output file \"" << outputFileName << + "\" is the same as the input file" << endl; + } + + if ( outputFileName != 0 ) { + /* Create the filter on the output and open it. */ + outFilter = new output_filter( outputFileName ); + outFilter->open( outputFileName, ios::out|ios::trunc ); + if ( !outFilter->is_open() ) { + error() << "error opening " << outputFileName << " for writing" << endl; + exit(1); + } + + /* Open the output stream, attaching it to the filter. */ + outStream = new ostream( outFilter ); + } + else { + /* Writing out ot std out. */ + outStream = &cout; + } + return outStream; +} + +/* Invoked by the parser when a ragel definition is opened. */ +CodeGenData *makeCodeGen( char *sourceFileName, char *fsmName, + ostream &out, bool wantComplete ) +{ + CodeGenData *codeGen = 0; + switch ( hostLangType ) { + case CCode: + switch ( codeStyle ) { + case GenTables: + codeGen = new CTabCodeGen(out); + break; + case GenFTables: + codeGen = new CFTabCodeGen(out); + break; + case GenFlat: + codeGen = new CFlatCodeGen(out); + break; + case GenFFlat: + codeGen = new CFFlatCodeGen(out); + break; + case GenGoto: + codeGen = new CGotoCodeGen(out); + break; + case GenFGoto: + codeGen = new CFGotoCodeGen(out); + break; + case GenIpGoto: + codeGen = new CIpGotoCodeGen(out); + break; + case GenSplit: + codeGen = new CSplitCodeGen(out); + break; + } + break; + + case DCode: + switch ( codeStyle ) { + case GenTables: + codeGen = new DTabCodeGen(out); + break; + case GenFTables: + codeGen = new DFTabCodeGen(out); + break; + case GenFlat: + codeGen = new DFlatCodeGen(out); + break; + case GenFFlat: + codeGen = new DFFlatCodeGen(out); + break; + case GenGoto: + codeGen = new DGotoCodeGen(out); + break; + case GenFGoto: + codeGen = new DFGotoCodeGen(out); + break; + case GenIpGoto: + codeGen = new DIpGotoCodeGen(out); + break; + case GenSplit: + codeGen = new DSplitCodeGen(out); + break; + } + break; + + default: break; + } + + codeGen->sourceFileName = sourceFileName; + codeGen->fsmName = fsmName; + codeGen->wantComplete = wantComplete; + + return codeGen; +} + + + +/* Main, process args and call yyparse to start scanning input. */ +int main(int argc, char **argv) +{ + ParamCheck pc("-:Hh?vlo:T:F:G:P:", argc, argv); + const char *xmlInputFileName = 0; + + while ( pc.check() ) { + switch ( pc.state ) { + case ParamCheck::match: + switch ( pc.parameter ) { + /* Output. */ + case 'o': + if ( *pc.parameterArg == 0 ) + error() << "a zero length output file name was given" << endl; + else if ( outputFileName != 0 ) + error() << "more than one output file name was given" << endl; + else { + /* Ok, remember the output file name. */ + outputFileName = pc.parameterArg; + } + break; + + case 'l': + noLineDirectives = true; + break; + + /* Code style. */ + case 'T': + if ( pc.parameterArg[0] == '0' ) + codeStyle = GenTables; + else if ( pc.parameterArg[0] == '1' ) + codeStyle = GenFTables; + else { + error() << "-T" << pc.parameterArg[0] << + " is an invalid argument" << endl; + exit(1); + } + break; + case 'F': + if ( pc.parameterArg[0] == '0' ) + codeStyle = GenFlat; + else if ( pc.parameterArg[0] == '1' ) + codeStyle = GenFFlat; + else { + error() << "-F" << pc.parameterArg[0] << + " is an invalid argument" << endl; + exit(1); + } + break; + case 'G': + if ( pc.parameterArg[0] == '0' ) + codeStyle = GenGoto; + else if ( pc.parameterArg[0] == '1' ) + codeStyle = GenFGoto; + else if ( pc.parameterArg[0] == '2' ) + codeStyle = GenIpGoto; + else { + error() << "-G" << pc.parameterArg[0] << + " is an invalid argument" << endl; + exit(1); + } + break; + case 'P': + codeStyle = GenSplit; + numSplitPartitions = atoi( pc.parameterArg ); + break; + + /* Version and help. */ + case 'v': + version(); + exit(0); + case 'H': case 'h': case '?': + usage(); + exit(0); + case '-': + if ( strcasecmp(pc.parameterArg, "help") == 0 ) { + usage(); + exit(0); + } + else if ( strcasecmp(pc.parameterArg, "version") == 0 ) { + version(); + exit(0); + } + else { + error() << "--" << pc.parameterArg << + " is an invalid argument" << endl; + break; + } + } + break; + + case ParamCheck::invalid: + error() << "-" << pc.parameter << " is an invalid argument" << endl; + break; + + case ParamCheck::noparam: + if ( *pc.curArg == 0 ) + error() << "a zero length input file name was given" << endl; + else if ( xmlInputFileName != 0 ) + error() << "more than one input file name was given" << endl; + else { + /* OK, Remember the filename. */ + xmlInputFileName = pc.curArg; + } + break; + } + } + + /* Bail on above errors. */ + if ( gblErrorCount > 0 ) + exit(1); + + /* Open the input file for reading. */ + if ( xmlInputFileName != 0 ) { + /* Open the input file for reading. */ + ifstream *inFile = new ifstream( xmlInputFileName ); + inStream = inFile; + if ( ! inFile->is_open() ) + error() << "could not open " << xmlInputFileName << " for reading" << endl; + } + else { + xmlInputFileName = "<stdin>"; + inStream = &cin; + } + + /* Bail on above errors. */ + if ( gblErrorCount > 0 ) + exit(1); + + bool wantComplete = true; + bool outputActive = true; + + /* Parse the input! */ + xml_parse( *inStream, xmlInputFileName, outputActive, wantComplete ); + + /* If writing to a file, delete the ostream, causing it to flush. + * Standard out is flushed automatically. */ + if ( outputFileName != 0 ) { + delete outStream; + delete outFilter; + } + + /* Finished, final check for errors.. */ + if ( gblErrorCount > 0 ) { + /* If we opened an output file, remove it. */ + if ( outputFileName != 0 ) + unlink( outputFileName ); + exit(1); + } + return 0; +} diff --git a/contrib/tools/ragel5/rlgen-cd/rlgen-cd.h b/contrib/tools/ragel5/rlgen-cd/rlgen-cd.h new file mode 100644 index 0000000000..93acd99bae --- /dev/null +++ b/contrib/tools/ragel5/rlgen-cd/rlgen-cd.h @@ -0,0 +1,60 @@ +/* + * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _RLCODEGEN_H +#define _RLCODEGEN_H + +#include <stdio.h> +#include <iostream> +#include "avltree.h" +#include "vector.h" +#include "config.h" + +#define PROGNAME "rlgen-cd" + +/* Target output style. */ +enum CodeStyleEnum +{ + GenTables, + GenFTables, + GenFlat, + GenFFlat, + GenGoto, + GenFGoto, + GenIpGoto, + GenSplit +}; + +extern CodeStyleEnum codeStyle; + + +/* IO filenames and stream. */ +extern bool graphvizDone; + +extern int gblErrorCount; + +/* Options. */ +extern int numSplitPartitions; +extern bool noLineDirectives; + +std::ostream &error(); + +#endif /* _RLCODEGEN_H */ diff --git a/contrib/tools/ragel5/rlgen-cd/splitcodegen.cpp b/contrib/tools/ragel5/rlgen-cd/splitcodegen.cpp new file mode 100644 index 0000000000..d703b37eea --- /dev/null +++ b/contrib/tools/ragel5/rlgen-cd/splitcodegen.cpp @@ -0,0 +1,521 @@ +/* + * Copyright 2006 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + + +#include "rlgen-cd.h" +#include "splitcodegen.h" +#include "gendata.h" +#include <assert.h> + +using std::ostream; +using std::ios; +using std::endl; + +/* Emit the goto to take for a given transition. */ +std::ostream &SplitCodeGen::TRANS_GOTO( RedTransAp *trans, int level ) +{ + if ( trans->targ->partition == currentPartition ) { + if ( trans->action != 0 ) { + /* Go to the transition which will go to the state. */ + out << TABS(level) << "goto tr" << trans->id << ";"; + } + else { + /* Go directly to the target state. */ + out << TABS(level) << "goto st" << trans->targ->id << ";"; + } + } + else { + if ( trans->action != 0 ) { + /* Go to the transition which will go to the state. */ + out << TABS(level) << "goto ptr" << trans->id << ";"; + trans->partitionBoundary = true; + } + else { + /* Go directly to the target state. */ + out << TABS(level) << "goto pst" << trans->targ->id << ";"; + trans->targ->partitionBoundary = true; + } + } + return out; +} + +/* Called from before writing the gotos for each state. */ +void SplitCodeGen::GOTO_HEADER( RedStateAp *state, bool stateInPartition ) +{ + bool anyWritten = IN_TRANS_ACTIONS( state ); + + if ( state->labelNeeded ) + out << "st" << state->id << ":\n"; + + if ( state->toStateAction != 0 ) { + /* Remember that we wrote an action. Write every action in the list. */ + anyWritten = true; + for ( ActionTable::Iter item = state->toStateAction->key; item.lte(); item++ ) + ACTION( out, item->value, state->id, false ); + } + + /* Advance and test buffer pos. */ + if ( state->labelNeeded ) { + if ( hasEnd ) { + out << + " if ( ++" << P() << " == " << PE() << " )\n" + " goto _out" << state->id << ";\n"; + } + else { + out << + " " << P() << " += 1;\n"; + } + } + + /* Give the state a switch case. */ + out << "case " << state->id << ":\n"; + + if ( state->fromStateAction != 0 ) { + /* Remember that we wrote an action. Write every action in the list. */ + anyWritten = true; + for ( ActionTable::Iter item = state->fromStateAction->key; item.lte(); item++ ) + ACTION( out, item->value, state->id, false ); + } + + if ( anyWritten ) + genLineDirective( out ); + + /* Record the prev state if necessary. */ + if ( state->anyRegCurStateRef() ) + out << " _ps = " << state->id << ";\n"; +} + +std::ostream &SplitCodeGen::STATE_GOTOS( int partition ) +{ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->partition == partition ) { + if ( st == redFsm->errState ) + STATE_GOTO_ERROR(); + else { + /* We call into the base of the goto which calls back into us + * using virtual functions. Set the current partition rather + * than coding parameter passing throughout. */ + currentPartition = partition; + + /* Writing code above state gotos. */ + GOTO_HEADER( st, st->partition == partition ); + + if ( st->stateCondVect.length() > 0 ) { + out << " _widec = " << GET_KEY() << ";\n"; + emitCondBSearch( st, 1, 0, st->stateCondVect.length() - 1 ); + } + + /* Try singles. */ + if ( st->outSingle.length() > 0 ) + emitSingleSwitch( st ); + + /* Default case is to binary search for the ranges, if that fails then */ + if ( st->outRange.length() > 0 ) + emitRangeBSearch( st, 1, 0, st->outRange.length() - 1 ); + + /* Write the default transition. */ + TRANS_GOTO( st->defTrans, 1 ) << "\n"; + } + } + } + return out; +} + + +std::ostream &SplitCodeGen::PART_TRANS( int partition ) +{ + for ( TransApSet::Iter trans = redFsm->transSet; trans.lte(); trans++ ) { + if ( trans->partitionBoundary ) { + out << + "ptr" << trans->id << ":\n"; + + if ( trans->action != 0 ) { + /* If the action contains a next, then we must preload the current + * state since the action may or may not set it. */ + if ( trans->action->anyNextStmt() ) + out << " " << CS() << " = " << trans->targ->id << ";\n"; + + /* Write each action in the list. */ + for ( ActionTable::Iter item = trans->action->key; item.lte(); item++ ) + ACTION( out, item->value, trans->targ->id, false ); + } + + out << + " goto pst" << trans->targ->id << ";\n"; + trans->targ->partitionBoundary = true; + } + } + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->partitionBoundary ) { + out << + " pst" << st->id << ":\n" + " " << CS() << " = " << st->id << ";\n"; + + if ( st->toStateAction != 0 ) { + /* Remember that we wrote an action. Write every action in the list. */ + for ( ActionTable::Iter item = st->toStateAction->key; item.lte(); item++ ) + ACTION( out, item->value, st->id, false ); + genLineDirective( out ); + } + + ptOutLabelUsed = true; + out << " goto _pt_out; \n"; + } + } + return out; +} + +std::ostream &SplitCodeGen::EXIT_STATES( int partition ) +{ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st->partition == partition && st->outNeeded ) { + outLabelUsed = true; + out << " _out" << st->id << ": " << CS() << " = " << + st->id << "; goto _out; \n"; + } + } + return out; +} + + +std::ostream &SplitCodeGen::PARTITION( int partition ) +{ + outLabelUsed = false; + ptOutLabelUsed = false; + + /* Initialize the partition boundaries, which get set during the writing + * of states. After the state writing we will */ + for ( TransApSet::Iter trans = redFsm->transSet; trans.lte(); trans++ ) + trans->partitionBoundary = false; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) + st->partitionBoundary = false; + + out << " " << ALPH_TYPE() << " *p = *_pp, *pe = *_ppe;\n"; + + if ( redFsm->anyRegCurStateRef() ) + out << " int _ps = 0;\n"; + + if ( redFsm->anyConditions() ) + out << " " << WIDE_ALPH_TYPE() << " _widec;\n"; + + if ( useAgainLabel() ) { + out << + " goto _resume;\n" + "\n" + "_again:\n" + " switch ( " << CS() << " ) {\n"; + AGAIN_CASES() << + " default: break;\n" + " }\n" + "\n"; + + + if ( hasEnd ) { + outLabelUsed = true; + out << + " if ( ++" << P() << " == " << PE() << " )\n" + " goto _out;\n"; + + } + else { + out << + " " << P() << " += 1;\n"; + } + + out << + "_resume:\n"; + } + + out << + " switch ( " << CS() << " )\n {\n"; + STATE_GOTOS( partition ); + SWITCH_DEFAULT() << + " }\n"; + PART_TRANS( partition ); + EXIT_STATES( partition ); + + if ( outLabelUsed ) { + out << + "\n" + " _out:\n" + " *_pp = p;\n" + " *_ppe = pe;\n" + " return 0;\n"; + } + + if ( ptOutLabelUsed ) { + out << + "\n" + " _pt_out:\n" + " *_pp = p;\n" + " *_ppe = pe;\n" + " return 1;\n"; + } + + return out; +} + +std::ostream &SplitCodeGen::PART_MAP() +{ + int *partMap = new int[redFsm->stateList.length()]; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) + partMap[st->id] = st->partition; + + out << "\t"; + int totalItem = 0; + for ( int i = 0; i < redFsm->stateList.length(); i++ ) { + out << partMap[i]; + if ( i != redFsm->stateList.length() - 1 ) { + out << ", "; + if ( ++totalItem % IALL == 0 ) + out << "\n\t"; + } + } + + delete[] partMap; + return out; +} + +void SplitCodeGen::writeData() +{ + out << + "static const int " << START() << " = " << START_STATE_ID() << ";\n" + "\n"; + + if ( writeFirstFinal ) { + out << + "static const int " << FIRST_FINAL() << " = " << FIRST_FINAL_STATE() << ";\n" + "\n"; + } + + if ( writeErr ) { + out << + "static const int " << ERROR() << " = " << ERROR_STATE() << ";\n" + "\n"; + } + + + OPEN_ARRAY( ARRAY_TYPE(numSplitPartitions), PM() ); + PART_MAP(); + CLOSE_ARRAY() << + "\n"; + + for ( int p = 0; p < redFsm->nParts; p++ ) { + out << "int partition" << p << "( " << ALPH_TYPE() << " **_pp, " << ALPH_TYPE() << + " **_ppe, struct " << FSM_NAME() << " *fsm );\n"; + } + out << "\n"; +} + +std::ostream &SplitCodeGen::ALL_PARTITIONS() +{ + /* compute the format string. */ + int width = 0, high = redFsm->nParts - 1; + while ( high > 0 ) { + width++; + high /= 10; + } + assert( width <= 8 ); + char suffFormat[] = "_%6.6d.c"; + suffFormat[2] = suffFormat[4] = ( '0' + width ); + + for ( int p = 0; p < redFsm->nParts; p++ ) { + char suffix[10]; + sprintf( suffix, suffFormat, p ); + char *fn = fileNameFromStem( sourceFileName, suffix ); + char *include = fileNameFromStem( sourceFileName, ".h" ); + + /* Create the filter on the output and open it. */ + output_filter *partFilter = new output_filter( fn ); + partFilter->open( fn, ios::out|ios::trunc ); + if ( !partFilter->is_open() ) { + error() << "error opening " << fn << " for writing" << endl; + exit(1); + } + + /* Attach the new file to the output stream. */ + std::streambuf *prev_rdbuf = out.rdbuf( partFilter ); + + out << + "#include \"" << include << "\"\n" + "int partition" << p << "( " << ALPH_TYPE() << " **_pp, " << ALPH_TYPE() << + " **_ppe, struct " << FSM_NAME() << " *fsm )\n" + "{\n"; + PARTITION( p ) << + "}\n\n"; + out.flush(); + + /* Fix the output stream. */ + out.rdbuf( prev_rdbuf ); + } + return out; +} + + +void SplitCodeGen::writeExec() +{ + /* Must set labels immediately before writing because we may depend on the + * noend write option. */ + setLabelsNeeded(); + out << + " {\n" + " int _stat = 0;\n"; + + if ( hasEnd ) { + out << + " if ( " << P() << " == " << PE() << " )\n" + " goto _out;\n"; + } + + out << " goto _resume;\n"; + + /* In this reentry, to-state actions have already been executed on the + * partition-switch exit from the last partition. */ + out << "_reenter:\n"; + + if ( hasEnd ) { + out << + " if ( ++" << P() << " == " << PE() << " )\n" + " goto _out;\n"; + } + else { + out << + " " << P() << " += 1;\n"; + } + + out << "_resume:\n"; + + out << + " switch ( " << PM() << "[" << CS() << "] ) {\n"; + for ( int p = 0; p < redFsm->nParts; p++ ) { + out << + " case " << p << ":\n" + " _stat = partition" << p << "( &p, &pe, fsm );\n" + " break;\n"; + } + out << + " }\n" + " if ( _stat )\n" + " goto _reenter;\n"; + + if ( hasEnd ) + out << " _out: {}\n"; + + out << + " }\n"; + + ALL_PARTITIONS(); +} + +void SplitCodeGen::setLabelsNeeded( RedStateAp *fromState, InlineList *inlineList ) +{ + for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) { + switch ( item->type ) { + case InlineItem::Goto: case InlineItem::Call: { + /* In split code gen we only need labels for transitions across + * partitions. */ + if ( fromState->partition == item->targState->partition ){ + /* Mark the target as needing a label. */ + item->targState->labelNeeded = true; + } + break; + } + default: break; + } + + if ( item->children != 0 ) + setLabelsNeeded( fromState, item->children ); + } +} + +void SplitCodeGen::setLabelsNeeded( RedStateAp *fromState, RedTransAp *trans ) +{ + /* In the split code gen we don't need labels for transitions across + * partitions. */ + if ( fromState->partition == trans->targ->partition ) { + /* If there is no action with a next statement, then the label will be + * needed. */ + trans->labelNeeded = true; + if ( trans->action == 0 || !trans->action->anyNextStmt() ) + trans->targ->labelNeeded = true; + } + + /* Need labels for states that have goto or calls in action code + * invoked on characters (ie, not from out action code). */ + if ( trans->action != 0 ) { + /* Loop the actions. */ + for ( ActionTable::Iter act = trans->action->key; act.lte(); act++ ) { + /* Get the action and walk it's tree. */ + setLabelsNeeded( fromState, act->value->inlineList ); + } + } +} + +/* Set up labelNeeded flag for each state. */ +void SplitCodeGen::setLabelsNeeded() +{ + /* If we use the _again label, then we the _again switch, which uses all + * labels. */ + if ( useAgainLabel() ) { + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) + st->labelNeeded = true; + } + else { + /* Do not use all labels by default, init all labelNeeded vars to false. */ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) + st->labelNeeded = false; + for ( TransApSet::Iter trans = redFsm->transSet; trans.lte(); trans++ ) + trans->labelNeeded = false; + + if ( redFsm->errState != 0 && redFsm->anyLmSwitchError() ) + redFsm->errState->labelNeeded = true; + + /* Walk all transitions and set only those that have targs. */ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + for ( RedTransList::Iter tel = st->outRange; tel.lte(); tel++ ) + setLabelsNeeded( st, tel->value ); + + for ( RedTransList::Iter tel = st->outSingle; tel.lte(); tel++ ) + setLabelsNeeded( st, tel->value ); + + if ( st->defTrans != 0 ) + setLabelsNeeded( st, st->defTrans ); + } + } + + if ( hasEnd ) { + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) + st->outNeeded = st->labelNeeded; + } + else { + if ( redFsm->errState != 0 ) + redFsm->errState->outNeeded = true; + + for ( TransApSet::Iter trans = redFsm->transSet; trans.lte(); trans++ ) { + /* Any state with a transition in that has a break will need an + * out label. */ + if ( trans->action != 0 && trans->action->anyBreakStmt() ) + trans->targ->outNeeded = true; + } + } +} + diff --git a/contrib/tools/ragel5/rlgen-cd/splitcodegen.h b/contrib/tools/ragel5/rlgen-cd/splitcodegen.h new file mode 100644 index 0000000000..82fc37150e --- /dev/null +++ b/contrib/tools/ragel5/rlgen-cd/splitcodegen.h @@ -0,0 +1,71 @@ +/* + * Copyright 2006 Adrian Thurston <thurston@cs.queensu.ca> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _SPLITCODEGEN_H +#define _SPLITCODEGEN_H + +#include "ipgotocodegen.h" + +class SplitCodeGen : public IpGotoCodeGen +{ +public: + SplitCodeGen( ostream &out ) : FsmCodeGen(out), IpGotoCodeGen(out) {} + + bool ptOutLabelUsed; + + std::ostream &PART_MAP(); + std::ostream &EXIT_STATES( int partition ); + std::ostream &PART_TRANS( int partition ); + std::ostream &TRANS_GOTO( RedTransAp *trans, int level ); + void GOTO_HEADER( RedStateAp *state, bool stateInPartition ); + std::ostream &STATE_GOTOS( int partition ); + std::ostream &PARTITION( int partition ); + std::ostream &ALL_PARTITIONS(); + void writeData(); + void writeExec(); + void writeParts(); + + void setLabelsNeeded( RedStateAp *fromState, InlineList *inlineList ); + void setLabelsNeeded( RedStateAp *fromState, RedTransAp *trans ); + void setLabelsNeeded(); + + int currentPartition; +}; + +struct CSplitCodeGen + : public SplitCodeGen, public CCodeGen +{ + CSplitCodeGen( ostream &out ) : + FsmCodeGen(out), SplitCodeGen(out), CCodeGen(out) {} +}; + +/* + * class DIpGotoCodeGen + */ +struct DSplitCodeGen + : public SplitCodeGen, public DCodeGen +{ + DSplitCodeGen( ostream &out ) : + FsmCodeGen(out), SplitCodeGen(out), DCodeGen(out) {} +}; + + +#endif /* _SPLITCODEGEN_H */ diff --git a/contrib/tools/ragel5/rlgen-cd/tabcodegen.cpp b/contrib/tools/ragel5/rlgen-cd/tabcodegen.cpp new file mode 100644 index 0000000000..22f09534b2 --- /dev/null +++ b/contrib/tools/ragel5/rlgen-cd/tabcodegen.cpp @@ -0,0 +1,988 @@ +/* + * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca> + * 2004 Erich Ocean <eric.ocean@ampede.com> + * 2005 Alan West <alan@alanz.com> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "rlgen-cd.h" +#include "tabcodegen.h" +#include "redfsm.h" +#include "gendata.h" + +/* Determine if we should use indicies or not. */ +void TabCodeGen::calcIndexSize() +{ + int sizeWithInds = 0, sizeWithoutInds = 0; + + /* Calculate cost of using with indicies. */ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + int totalIndex = st->outSingle.length() + st->outRange.length() + + (st->defTrans == 0 ? 0 : 1); + sizeWithInds += arrayTypeSize(redFsm->maxIndex) * totalIndex; + } + sizeWithInds += arrayTypeSize(redFsm->maxState) * redFsm->transSet.length(); + if ( redFsm->anyActions() ) + sizeWithInds += arrayTypeSize(redFsm->maxActionLoc) * redFsm->transSet.length(); + + /* Calculate the cost of not using indicies. */ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + int totalIndex = st->outSingle.length() + st->outRange.length() + + (st->defTrans == 0 ? 0 : 1); + sizeWithoutInds += arrayTypeSize(redFsm->maxState) * totalIndex; + if ( redFsm->anyActions() ) + sizeWithoutInds += arrayTypeSize(redFsm->maxActionLoc) * totalIndex; + } + + /* If using indicies reduces the size, use them. */ + useIndicies = sizeWithInds < sizeWithoutInds; +} + +std::ostream &TabCodeGen::TO_STATE_ACTION( RedStateAp *state ) +{ + int act = 0; + if ( state->toStateAction != 0 ) + act = state->toStateAction->location+1; + out << act; + return out; +} + +std::ostream &TabCodeGen::FROM_STATE_ACTION( RedStateAp *state ) +{ + int act = 0; + if ( state->fromStateAction != 0 ) + act = state->fromStateAction->location+1; + out << act; + return out; +} + +std::ostream &TabCodeGen::EOF_ACTION( RedStateAp *state ) +{ + int act = 0; + if ( state->eofAction != 0 ) + act = state->eofAction->location+1; + out << act; + return out; +} + + +std::ostream &TabCodeGen::TRANS_ACTION( RedTransAp *trans ) +{ + /* If there are actions, emit them. Otherwise emit zero. */ + int act = 0; + if ( trans->action != 0 ) + act = trans->action->location+1; + out << act; + return out; +} + +std::ostream &TabCodeGen::TO_STATE_ACTION_SWITCH() +{ + /* Walk the list of functions, printing the cases. */ + for ( ActionList::Iter act = actionList; act.lte(); act++ ) { + /* Write out referenced actions. */ + if ( act->numToStateRefs > 0 ) { + /* Write the case label, the action and the case break. */ + out << "\tcase " << act->actionId << ":\n"; + ACTION( out, act, 0, false ); + out << "\tbreak;\n"; + } + } + + genLineDirective( out ); + return out; +} + +std::ostream &TabCodeGen::FROM_STATE_ACTION_SWITCH() +{ + /* Walk the list of functions, printing the cases. */ + for ( ActionList::Iter act = actionList; act.lte(); act++ ) { + /* Write out referenced actions. */ + if ( act->numFromStateRefs > 0 ) { + /* Write the case label, the action and the case break. */ + out << "\tcase " << act->actionId << ":\n"; + ACTION( out, act, 0, false ); + out << "\tbreak;\n"; + } + } + + genLineDirective( out ); + return out; +} + +std::ostream &TabCodeGen::EOF_ACTION_SWITCH() +{ + /* Walk the list of functions, printing the cases. */ + for ( ActionList::Iter act = actionList; act.lte(); act++ ) { + /* Write out referenced actions. */ + if ( act->numEofRefs > 0 ) { + /* Write the case label, the action and the case break. */ + out << "\tcase " << act->actionId << ":\n"; + ACTION( out, act, 0, true ); + out << "\tbreak;\n"; + } + } + + genLineDirective( out ); + return out; +} + + +std::ostream &TabCodeGen::ACTION_SWITCH() +{ + /* Walk the list of functions, printing the cases. */ + for ( ActionList::Iter act = actionList; act.lte(); act++ ) { + /* Write out referenced actions. */ + if ( act->numTransRefs > 0 ) { + /* Write the case label, the action and the case break. */ + out << "\tcase " << act->actionId << ":\n"; + ACTION( out, act, 0, false ); + out << "\tbreak;\n"; + } + } + + genLineDirective( out ); + return out; +} + +std::ostream &TabCodeGen::COND_OFFSETS() +{ + out << "\t"; + int totalStateNum = 0, curKeyOffset = 0; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Write the key offset. */ + out << curKeyOffset; + if ( !st.last() ) { + out << ", "; + if ( ++totalStateNum % IALL == 0 ) + out << "\n\t"; + } + + /* Move the key offset ahead. */ + curKeyOffset += st->stateCondList.length(); + } + out << "\n"; + return out; +} + +std::ostream &TabCodeGen::KEY_OFFSETS() +{ + out << "\t"; + int totalStateNum = 0, curKeyOffset = 0; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Write the key offset. */ + out << curKeyOffset; + if ( !st.last() ) { + out << ", "; + if ( ++totalStateNum % IALL == 0 ) + out << "\n\t"; + } + + /* Move the key offset ahead. */ + curKeyOffset += st->outSingle.length() + st->outRange.length()*2; + } + out << "\n"; + return out; +} + + +std::ostream &TabCodeGen::INDEX_OFFSETS() +{ + out << "\t"; + int totalStateNum = 0, curIndOffset = 0; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Write the index offset. */ + out << curIndOffset; + if ( !st.last() ) { + out << ", "; + if ( ++totalStateNum % IALL == 0 ) + out << "\n\t"; + } + + /* Move the index offset ahead. */ + curIndOffset += st->outSingle.length() + st->outRange.length(); + if ( st->defTrans != 0 ) + curIndOffset += 1; + } + out << "\n"; + return out; +} + +std::ostream &TabCodeGen::COND_LENS() +{ + out << "\t"; + int totalStateNum = 0; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Write singles length. */ + out << st->stateCondList.length(); + if ( !st.last() ) { + out << ", "; + if ( ++totalStateNum % IALL == 0 ) + out << "\n\t"; + } + } + out << "\n"; + return out; +} + + +std::ostream &TabCodeGen::SINGLE_LENS() +{ + out << "\t"; + int totalStateNum = 0; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Write singles length. */ + out << st->outSingle.length(); + if ( !st.last() ) { + out << ", "; + if ( ++totalStateNum % IALL == 0 ) + out << "\n\t"; + } + } + out << "\n"; + return out; +} + +std::ostream &TabCodeGen::RANGE_LENS() +{ + out << "\t"; + int totalStateNum = 0; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Emit length of range index. */ + out << st->outRange.length(); + if ( !st.last() ) { + out << ", "; + if ( ++totalStateNum % IALL == 0 ) + out << "\n\t"; + } + } + out << "\n"; + return out; +} + +std::ostream &TabCodeGen::TO_STATE_ACTIONS() +{ + out << "\t"; + int totalStateNum = 0; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Write any eof action. */ + TO_STATE_ACTION(st); + if ( !st.last() ) { + out << ", "; + if ( ++totalStateNum % IALL == 0 ) + out << "\n\t"; + } + } + out << "\n"; + return out; +} + +std::ostream &TabCodeGen::FROM_STATE_ACTIONS() +{ + out << "\t"; + int totalStateNum = 0; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Write any eof action. */ + FROM_STATE_ACTION(st); + if ( !st.last() ) { + out << ", "; + if ( ++totalStateNum % IALL == 0 ) + out << "\n\t"; + } + } + out << "\n"; + return out; +} + +std::ostream &TabCodeGen::EOF_ACTIONS() +{ + out << "\t"; + int totalStateNum = 0; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Write any eof action. */ + EOF_ACTION(st); + if ( !st.last() ) { + out << ", "; + if ( ++totalStateNum % IALL == 0 ) + out << "\n\t"; + } + } + out << "\n"; + return out; +} + +std::ostream &TabCodeGen::COND_KEYS() +{ + out << '\t'; + int totalTrans = 0; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Loop the state's transitions. */ + for ( StateCondList::Iter sc = st->stateCondList; sc.lte(); sc++ ) { + /* Lower key. */ + out << KEY( sc->lowKey ) << ", "; + if ( ++totalTrans % IALL == 0 ) + out << "\n\t"; + + /* Upper key. */ + out << KEY( sc->highKey ) << ", "; + if ( ++totalTrans % IALL == 0 ) + out << "\n\t"; + } + } + + /* Output one last number so we don't have to figure out when the last + * entry is and avoid writing a comma. */ + out << 0 << "\n"; + return out; +} + +std::ostream &TabCodeGen::COND_SPACES() +{ + out << '\t'; + int totalTrans = 0; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Loop the state's transitions. */ + for ( StateCondList::Iter sc = st->stateCondList; sc.lte(); sc++ ) { + /* Cond Space id. */ + out << sc->condSpace->condSpaceId << ", "; + if ( ++totalTrans % IALL == 0 ) + out << "\n\t"; + } + } + + /* Output one last number so we don't have to figure out when the last + * entry is and avoid writing a comma. */ + out << 0 << "\n"; + return out; +} + +std::ostream &TabCodeGen::KEYS() +{ + out << '\t'; + int totalTrans = 0; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Loop the singles. */ + for ( RedTransList::Iter stel = st->outSingle; stel.lte(); stel++ ) { + out << KEY( stel->lowKey ) << ", "; + if ( ++totalTrans % IALL == 0 ) + out << "\n\t"; + } + + /* Loop the state's transitions. */ + for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) { + /* Lower key. */ + out << KEY( rtel->lowKey ) << ", "; + if ( ++totalTrans % IALL == 0 ) + out << "\n\t"; + + /* Upper key. */ + out << KEY( rtel->highKey ) << ", "; + if ( ++totalTrans % IALL == 0 ) + out << "\n\t"; + } + } + + /* Output one last number so we don't have to figure out when the last + * entry is and avoid writing a comma. */ + out << 0 << "\n"; + return out; +} + +std::ostream &TabCodeGen::INDICIES() +{ + int totalTrans = 0; + out << '\t'; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Walk the singles. */ + for ( RedTransList::Iter stel = st->outSingle; stel.lte(); stel++ ) { + out << stel->value->id << ", "; + if ( ++totalTrans % IALL == 0 ) + out << "\n\t"; + } + + /* Walk the ranges. */ + for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) { + out << rtel->value->id << ", "; + if ( ++totalTrans % IALL == 0 ) + out << "\n\t"; + } + + /* The state's default index goes next. */ + if ( st->defTrans != 0 ) { + out << st->defTrans->id << ", "; + if ( ++totalTrans % IALL == 0 ) + out << "\n\t"; + } + } + + /* Output one last number so we don't have to figure out when the last + * entry is and avoid writing a comma. */ + out << 0 << "\n"; + return out; +} + +std::ostream &TabCodeGen::TRANS_TARGS() +{ + int totalTrans = 0; + out << '\t'; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Walk the singles. */ + for ( RedTransList::Iter stel = st->outSingle; stel.lte(); stel++ ) { + RedTransAp *trans = stel->value; + out << trans->targ->id << ", "; + if ( ++totalTrans % IALL == 0 ) + out << "\n\t"; + } + + /* Walk the ranges. */ + for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) { + RedTransAp *trans = rtel->value; + out << trans->targ->id << ", "; + if ( ++totalTrans % IALL == 0 ) + out << "\n\t"; + } + + /* The state's default target state. */ + if ( st->defTrans != 0 ) { + RedTransAp *trans = st->defTrans; + out << trans->targ->id << ", "; + if ( ++totalTrans % IALL == 0 ) + out << "\n\t"; + } + } + + /* Output one last number so we don't have to figure out when the last + * entry is and avoid writing a comma. */ + out << 0 << "\n"; + return out; +} + + +std::ostream &TabCodeGen::TRANS_ACTIONS() +{ + int totalTrans = 0; + out << '\t'; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + /* Walk the singles. */ + for ( RedTransList::Iter stel = st->outSingle; stel.lte(); stel++ ) { + RedTransAp *trans = stel->value; + TRANS_ACTION( trans ) << ", "; + if ( ++totalTrans % IALL == 0 ) + out << "\n\t"; + } + + /* Walk the ranges. */ + for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) { + RedTransAp *trans = rtel->value; + TRANS_ACTION( trans ) << ", "; + if ( ++totalTrans % IALL == 0 ) + out << "\n\t"; + } + + /* The state's default index goes next. */ + if ( st->defTrans != 0 ) { + RedTransAp *trans = st->defTrans; + TRANS_ACTION( trans ) << ", "; + if ( ++totalTrans % IALL == 0 ) + out << "\n\t"; + } + } + + /* Output one last number so we don't have to figure out when the last + * entry is and avoid writing a comma. */ + out << 0 << "\n"; + return out; +} + +std::ostream &TabCodeGen::TRANS_TARGS_WI() +{ + /* Transitions must be written ordered by their id. */ + RedTransAp **transPtrs = new RedTransAp*[redFsm->transSet.length()]; + for ( TransApSet::Iter trans = redFsm->transSet; trans.lte(); trans++ ) + transPtrs[trans->id] = trans; + + /* Keep a count of the num of items in the array written. */ + out << '\t'; + int totalStates = 0; + for ( int t = 0; t < redFsm->transSet.length(); t++ ) { + /* Write out the target state. */ + RedTransAp *trans = transPtrs[t]; + out << trans->targ->id; + if ( t < redFsm->transSet.length()-1 ) { + out << ", "; + if ( ++totalStates % IALL == 0 ) + out << "\n\t"; + } + } + out << "\n"; + delete[] transPtrs; + return out; +} + + +std::ostream &TabCodeGen::TRANS_ACTIONS_WI() +{ + /* Transitions must be written ordered by their id. */ + RedTransAp **transPtrs = new RedTransAp*[redFsm->transSet.length()]; + for ( TransApSet::Iter trans = redFsm->transSet; trans.lte(); trans++ ) + transPtrs[trans->id] = trans; + + /* Keep a count of the num of items in the array written. */ + out << '\t'; + int totalAct = 0; + for ( int t = 0; t < redFsm->transSet.length(); t++ ) { + /* Write the function for the transition. */ + RedTransAp *trans = transPtrs[t]; + TRANS_ACTION( trans ); + if ( t < redFsm->transSet.length()-1 ) { + out << ", "; + if ( ++totalAct % IALL == 0 ) + out << "\n\t"; + } + } + out << "\n"; + delete[] transPtrs; + return out; +} + +void TabCodeGen::LOCATE_TRANS() +{ + out << + " _keys = " << ARR_OFF( K(), KO() + "[" + CS() + "]" ) << ";\n" + " _trans = " << IO() << "[" << CS() << "];\n" + "\n" + " _klen = " << SL() << "[" << CS() << "];\n" + " if ( _klen > 0 ) {\n" + " " << PTR_CONST() << WIDE_ALPH_TYPE() << POINTER() << "_lower = _keys;\n" + " " << PTR_CONST() << WIDE_ALPH_TYPE() << POINTER() << "_mid;\n" + " " << PTR_CONST() << WIDE_ALPH_TYPE() << POINTER() << "_upper = _keys + _klen - 1;\n" + " while (1) {\n" + " if ( _upper < _lower )\n" + " break;\n" + "\n" + " _mid = _lower + ((_upper-_lower) >> 1);\n" + " if ( " << GET_WIDE_KEY() << " < *_mid )\n" + " _upper = _mid - 1;\n" + " else if ( " << GET_WIDE_KEY() << " > *_mid )\n" + " _lower = _mid + 1;\n" + " else {\n" + " _trans += (_mid - _keys);\n" + " goto _match;\n" + " }\n" + " }\n" + " _keys += _klen;\n" + " _trans += _klen;\n" + " }\n" + "\n" + " _klen = " << RL() << "[" << CS() << "];\n" + " if ( _klen > 0 ) {\n" + " " << PTR_CONST() << WIDE_ALPH_TYPE() << POINTER() << "_lower = _keys;\n" + " " << PTR_CONST() << WIDE_ALPH_TYPE() << POINTER() << "_mid;\n" + " " << PTR_CONST() << WIDE_ALPH_TYPE() << POINTER() << "_upper = _keys + (_klen<<1) - 2;\n" + " while (1) {\n" + " if ( _upper < _lower )\n" + " break;\n" + "\n" + " _mid = _lower + (((_upper-_lower) >> 1) & ~1);\n" + " if ( " << GET_WIDE_KEY() << " < _mid[0] )\n" + " _upper = _mid - 2;\n" + " else if ( " << GET_WIDE_KEY() << " > _mid[1] )\n" + " _lower = _mid + 2;\n" + " else {\n" + " _trans += ((_mid - _keys)>>1);\n" + " goto _match;\n" + " }\n" + " }\n" + " _trans += _klen;\n" + " }\n" + "\n"; +} + +void TabCodeGen::GOTO( ostream &ret, int gotoDest, bool inFinish ) +{ + ret << "{" << CS() << " = " << gotoDest << "; " << + CTRL_FLOW() << "goto _again;}"; +} + +void TabCodeGen::GOTO_EXPR( ostream &ret, InlineItem *ilItem, bool inFinish ) +{ + ret << "{" << CS() << " = ("; + INLINE_LIST( ret, ilItem->children, 0, inFinish ); + ret << "); " << CTRL_FLOW() << "goto _again;}"; +} + +void TabCodeGen::CURS( ostream &ret, bool inFinish ) +{ + ret << "(_ps)"; +} + +void TabCodeGen::TARGS( ostream &ret, bool inFinish, int targState ) +{ + ret << "(" << CS() << ")"; +} + +void TabCodeGen::NEXT( ostream &ret, int nextDest, bool inFinish ) +{ + ret << CS() << " = " << nextDest << ";"; +} + +void TabCodeGen::NEXT_EXPR( ostream &ret, InlineItem *ilItem, bool inFinish ) +{ + ret << CS() << " = ("; + INLINE_LIST( ret, ilItem->children, 0, inFinish ); + ret << ");"; +} + +void TabCodeGen::CALL( ostream &ret, int callDest, int targState, bool inFinish ) +{ + ret << "{" << STACK() << "[" << TOP() << "++] = " << CS() << "; " << CS() << " = " << + callDest << "; " << CTRL_FLOW() << "goto _again;}"; +} + +void TabCodeGen::CALL_EXPR( ostream &ret, InlineItem *ilItem, int targState, bool inFinish ) +{ + ret << "{" << STACK() << "[" << TOP() << "++] = " << CS() << "; " << CS() << " = ("; + INLINE_LIST( ret, ilItem->children, targState, inFinish ); + ret << "); " << CTRL_FLOW() << "goto _again;}"; +} + +void TabCodeGen::RET( ostream &ret, bool inFinish ) +{ + ret << "{" << CS() << " = " << STACK() << "[--" << + TOP() << "]; " << CTRL_FLOW() << "goto _again;}"; +} + +void TabCodeGen::BREAK( ostream &ret, int targState ) +{ + outLabelUsed = true; + ret << CTRL_FLOW() << "goto _out;"; +} + +void TabCodeGen::writeData() +{ + /* If there are any transtion functions then output the array. If there + * are none, don't bother emitting an empty array that won't be used. */ + if ( redFsm->anyActions() ) { + OPEN_ARRAY( ARRAY_TYPE(redFsm->maxActArrItem), A() ); + ACTIONS_ARRAY(); + CLOSE_ARRAY() << + "\n"; + } + + if ( redFsm->anyConditions() ) { + OPEN_ARRAY( ARRAY_TYPE(redFsm->maxCondOffset), CO() ); + COND_OFFSETS(); + CLOSE_ARRAY() << + "\n"; + + OPEN_ARRAY( ARRAY_TYPE(redFsm->maxCondLen), CL() ); + COND_LENS(); + CLOSE_ARRAY() << + "\n"; + + OPEN_ARRAY( WIDE_ALPH_TYPE(), CK() ); + COND_KEYS(); + CLOSE_ARRAY() << + "\n"; + + OPEN_ARRAY( ARRAY_TYPE(redFsm->maxCondSpaceId), C() ); + COND_SPACES(); + CLOSE_ARRAY() << + "\n"; + } + + OPEN_ARRAY( ARRAY_TYPE(redFsm->maxKeyOffset), KO() ); + KEY_OFFSETS(); + CLOSE_ARRAY() << + "\n"; + + OPEN_ARRAY( WIDE_ALPH_TYPE(), K() ); + KEYS(); + CLOSE_ARRAY() << + "\n"; + + OPEN_ARRAY( ARRAY_TYPE(redFsm->maxSingleLen), SL() ); + SINGLE_LENS(); + CLOSE_ARRAY() << + "\n"; + + OPEN_ARRAY( ARRAY_TYPE(redFsm->maxRangeLen), RL() ); + RANGE_LENS(); + CLOSE_ARRAY() << + "\n"; + + OPEN_ARRAY( ARRAY_TYPE(redFsm->maxIndexOffset), IO() ); + INDEX_OFFSETS(); + CLOSE_ARRAY() << + "\n"; + + if ( useIndicies ) { + OPEN_ARRAY( ARRAY_TYPE(redFsm->maxIndex), I() ); + INDICIES(); + CLOSE_ARRAY() << + "\n"; + + OPEN_ARRAY( ARRAY_TYPE(redFsm->maxState), TT() ); + TRANS_TARGS_WI(); + CLOSE_ARRAY() << + "\n"; + + if ( redFsm->anyActions() ) { + OPEN_ARRAY( ARRAY_TYPE(redFsm->maxActionLoc), TA() ); + TRANS_ACTIONS_WI(); + CLOSE_ARRAY() << + "\n"; + } + } + else { + OPEN_ARRAY( ARRAY_TYPE(redFsm->maxState), TT() ); + TRANS_TARGS(); + CLOSE_ARRAY() << + "\n"; + + if ( redFsm->anyActions() ) { + OPEN_ARRAY( ARRAY_TYPE(redFsm->maxActionLoc), TA() ); + TRANS_ACTIONS(); + CLOSE_ARRAY() << + "\n"; + } + } + + if ( redFsm->anyToStateActions() ) { + OPEN_ARRAY( ARRAY_TYPE(redFsm->maxActionLoc), TSA() ); + TO_STATE_ACTIONS(); + CLOSE_ARRAY() << + "\n"; + } + + if ( redFsm->anyFromStateActions() ) { + OPEN_ARRAY( ARRAY_TYPE(redFsm->maxActionLoc), FSA() ); + FROM_STATE_ACTIONS(); + CLOSE_ARRAY() << + "\n"; + } + + if ( redFsm->anyEofActions() ) { + OPEN_ARRAY( ARRAY_TYPE(redFsm->maxActionLoc), EA() ); + EOF_ACTIONS(); + CLOSE_ARRAY() << + "\n"; + } + + STATE_IDS(); +} + +void TabCodeGen::COND_TRANSLATE() +{ + out << + " _widec = " << GET_KEY() << ";\n" + " _klen = " << CL() << "[" << CS() << "];\n" + " _keys = " << ARR_OFF( CK(), "(" + CO() + "[" + CS() + "]*2)" ) << ";\n" + " if ( _klen > 0 ) {\n" + " " << PTR_CONST() << WIDE_ALPH_TYPE() << POINTER() << "_lower = _keys;\n" + " " << PTR_CONST() << WIDE_ALPH_TYPE() << POINTER() << "_mid;\n" + " " << PTR_CONST() << WIDE_ALPH_TYPE() << POINTER() << "_upper = _keys + (_klen<<1) - 2;\n" + " while (1) {\n" + " if ( _upper < _lower )\n" + " break;\n" + "\n" + " _mid = _lower + (((_upper-_lower) >> 1) & ~1);\n" + " if ( " << GET_WIDE_KEY() << " < _mid[0] )\n" + " _upper = _mid - 2;\n" + " else if ( " << GET_WIDE_KEY() << " > _mid[1] )\n" + " _lower = _mid + 2;\n" + " else {\n" + " switch ( " << C() << "[" << CO() << "[" << CS() << "]" + " + ((_mid - _keys)>>1)] ) {\n"; + + for ( CondSpaceList::Iter csi = condSpaceList; csi.lte(); csi++ ) { + CondSpace *condSpace = csi; + out << " case " << condSpace->condSpaceId << ": {\n"; + out << TABS(2) << "_widec = " << CAST(WIDE_ALPH_TYPE()) << "(" << + KEY(condSpace->baseKey) << " + (" << GET_KEY() << + " - " << KEY(keyOps->minKey) << "));\n"; + + for ( CondSet::Iter csi = condSpace->condSet; csi.lte(); csi++ ) { + out << TABS(2) << "if ( "; + CONDITION( out, *csi ); + Size condValOffset = ((1 << csi.pos()) * keyOps->alphSize()); + out << " ) _widec += " << condValOffset << ";\n"; + } + + out << + " break;\n" + " }\n"; + } + + SWITCH_DEFAULT(); + + out << + " }\n" + " break;\n" + " }\n" + " }\n" + " }\n" + "\n"; +} + +void TabCodeGen::writeExec() +{ + outLabelUsed = false; + + out << + " {\n" + " int _klen"; + + if ( redFsm->anyRegCurStateRef() ) + out << ", _ps"; + + out << + ";\n" + " " << UINT() << " _trans;\n"; + + if ( redFsm->anyConditions() ) + out << " " << WIDE_ALPH_TYPE() << " _widec;\n"; + + if ( redFsm->anyToStateActions() || redFsm->anyRegActions() + || redFsm->anyFromStateActions() ) + { + out << + " " << PTR_CONST() << ARRAY_TYPE(redFsm->maxActArrItem) << POINTER() << "_acts;\n" + " " << UINT() << " _nacts;\n"; + } + + out << + " " << PTR_CONST() << WIDE_ALPH_TYPE() << POINTER() << "_keys;\n" + "\n"; + + if ( hasEnd ) { + outLabelUsed = true; + out << + " if ( " << P() << " == " << PE() << " )\n" + " goto _out;\n"; + } + + out << "_resume:\n"; + + if ( redFsm->errState != 0 ) { + outLabelUsed = true; + out << + " if ( " << CS() << " == " << redFsm->errState->id << " )\n" + " goto _out;\n"; + } + + if ( redFsm->anyFromStateActions() ) { + out << + " _acts = " << ARR_OFF( A(), FSA() + "[" + CS() + "]" ) << ";\n" + " _nacts = " << CAST(UINT()) << " *_acts++;\n" + " while ( _nacts-- > 0 ) {\n" + " switch ( *_acts++ ) {\n"; + FROM_STATE_ACTION_SWITCH(); + SWITCH_DEFAULT() << + " }\n" + " }\n" + "\n"; + } + + if ( redFsm->anyConditions() ) + COND_TRANSLATE(); + + LOCATE_TRANS(); + + out << "_match:\n"; + + if ( redFsm->anyRegCurStateRef() ) + out << " _ps = " << CS() << ";\n"; + + if ( useIndicies ) + out << " _trans = " << I() << "[_trans];\n"; + + out << + " " << CS() << " = " << TT() << "[_trans];\n" + "\n"; + + if ( redFsm->anyRegActions() ) { + out << + " if ( " << TA() << "[_trans] == 0 )\n" + " goto _again;\n" + "\n" + " _acts = " << ARR_OFF( A(), TA() + "[_trans]" ) << ";\n" + " _nacts = " << CAST(UINT()) << " *_acts++;\n" + " while ( _nacts-- > 0 )\n {\n" + " switch ( *_acts++ )\n {\n"; + ACTION_SWITCH(); + SWITCH_DEFAULT() << + " }\n" + " }\n" + "\n"; + } + + if ( redFsm->anyRegActions() || redFsm->anyActionGotos() || + redFsm->anyActionCalls() || redFsm->anyActionRets() ) + out << "_again:\n"; + + if ( redFsm->anyToStateActions() ) { + out << + " _acts = " << ARR_OFF( A(), TSA() + "[" + CS() + "]" ) << ";\n" + " _nacts = " << CAST(UINT()) << " *_acts++;\n" + " while ( _nacts-- > 0 ) {\n" + " switch ( *_acts++ ) {\n"; + TO_STATE_ACTION_SWITCH(); + SWITCH_DEFAULT() << + " }\n" + " }\n" + "\n"; + } + + if ( hasEnd ) { + out << + " if ( ++" << P() << " != " << PE() << " )\n" + " goto _resume;\n"; + } + else { + out << + " " << P() << " += 1;\n" + " goto _resume;\n"; + } + + if ( outLabelUsed ) + out << " _out: {}\n"; + + out << " }\n"; +} + + +void TabCodeGen::writeEOF() +{ + if ( redFsm->anyEofActions() ) { + out << + " {\n" + " " << PTR_CONST() << ARRAY_TYPE(redFsm->maxActArrItem) << POINTER() << "_acts = " << + ARR_OFF( A(), EA() + "[" + CS() + "]" ) << ";\n" + " " << UINT() << " _nacts = " << CAST(UINT()) << " *_acts++;\n" + " while ( _nacts-- > 0 ) {\n" + " switch ( *_acts++ ) {\n"; + EOF_ACTION_SWITCH(); + SWITCH_DEFAULT() << + " }\n" + " }\n" + " }\n" + "\n"; + } +} diff --git a/contrib/tools/ragel5/rlgen-cd/tabcodegen.h b/contrib/tools/ragel5/rlgen-cd/tabcodegen.h new file mode 100644 index 0000000000..745eb18d81 --- /dev/null +++ b/contrib/tools/ragel5/rlgen-cd/tabcodegen.h @@ -0,0 +1,115 @@ +/* + * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca> + * 2004 Erich Ocean <eric.ocean@ampede.com> + * 2005 Alan West <alan@alanz.com> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _TABCODEGEN_H +#define _TABCODEGEN_H + +#include <iostream> +#include "fsmcodegen.h" + +/* Forwards. */ +struct CodeGenData; +struct NameInst; +struct RedTransAp; +struct RedStateAp; + +/* + * TabCodeGen + */ +class TabCodeGen : virtual public FsmCodeGen +{ +public: + TabCodeGen( ostream &out ) : FsmCodeGen(out) {} + virtual ~TabCodeGen() { } + virtual void writeData(); + virtual void writeExec(); + +protected: + std::ostream &TO_STATE_ACTION_SWITCH(); + std::ostream &FROM_STATE_ACTION_SWITCH(); + std::ostream &EOF_ACTION_SWITCH(); + std::ostream &ACTION_SWITCH(); + + std::ostream &COND_KEYS(); + std::ostream &COND_SPACES(); + std::ostream &KEYS(); + std::ostream &INDICIES(); + std::ostream &COND_OFFSETS(); + std::ostream &KEY_OFFSETS(); + std::ostream &INDEX_OFFSETS(); + std::ostream &COND_LENS(); + std::ostream &SINGLE_LENS(); + std::ostream &RANGE_LENS(); + std::ostream &TO_STATE_ACTIONS(); + std::ostream &FROM_STATE_ACTIONS(); + std::ostream &EOF_ACTIONS(); + std::ostream &TRANS_TARGS(); + std::ostream &TRANS_ACTIONS(); + std::ostream &TRANS_TARGS_WI(); + std::ostream &TRANS_ACTIONS_WI(); + void LOCATE_TRANS(); + + void COND_TRANSLATE(); + + void GOTO( ostream &ret, int gotoDest, bool inFinish ); + void CALL( ostream &ret, int callDest, int targState, bool inFinish ); + void NEXT( ostream &ret, int nextDest, bool inFinish ); + void GOTO_EXPR( ostream &ret, InlineItem *ilItem, bool inFinish ); + void NEXT_EXPR( ostream &ret, InlineItem *ilItem, bool inFinish ); + void CALL_EXPR( ostream &ret, InlineItem *ilItem, int targState, bool inFinish ); + void CURS( ostream &ret, bool inFinish ); + void TARGS( ostream &ret, bool inFinish, int targState ); + void RET( ostream &ret, bool inFinish ); + void BREAK( ostream &ret, int targState ); + + virtual std::ostream &TO_STATE_ACTION( RedStateAp *state ); + virtual std::ostream &FROM_STATE_ACTION( RedStateAp *state ); + virtual std::ostream &EOF_ACTION( RedStateAp *state ); + virtual std::ostream &TRANS_ACTION( RedTransAp *trans ); + virtual void calcIndexSize(); + virtual void writeEOF(); +}; + + +/* + * CTabCodeGen + */ +struct CTabCodeGen + : public TabCodeGen, public CCodeGen +{ + CTabCodeGen( ostream &out ) : + FsmCodeGen(out), TabCodeGen(out), CCodeGen(out) {} +}; + +/* + * DTabCodeGen + */ +struct DTabCodeGen + : public TabCodeGen, public DCodeGen +{ + DTabCodeGen( ostream &out ) : + FsmCodeGen(out), TabCodeGen(out), DCodeGen(out) {} +}; + + +#endif /* _TABCODEGEN_H */ diff --git a/contrib/tools/ragel5/rlgen-cd/ya.make b/contrib/tools/ragel5/rlgen-cd/ya.make new file mode 100644 index 0000000000..ef2a59f8c2 --- /dev/null +++ b/contrib/tools/ragel5/rlgen-cd/ya.make @@ -0,0 +1,25 @@ +PROGRAM() + +NO_UTIL() +NO_COMPILER_WARNINGS() + +PEERDIR( + contrib/tools/ragel5/aapl + contrib/tools/ragel5/common + contrib/tools/ragel5/redfsm +) + +SRCS( + fflatcodegen.cpp + fgotocodegen.cpp + flatcodegen.cpp + fsmcodegen.cpp + ftabcodegen.cpp + gotocodegen.cpp + ipgotocodegen.cpp + main.cpp + splitcodegen.cpp + tabcodegen.cpp +) + +END() diff --git a/geobase/CMakeLists.txt b/geobase/CMakeLists.txt new file mode 100644 index 0000000000..164af3f4bc --- /dev/null +++ b/geobase/CMakeLists.txt @@ -0,0 +1,10 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(library) +add_subdirectory(user-settings) diff --git a/geobase/library/CMakeLists.darwin-x86_64.txt b/geobase/library/CMakeLists.darwin-x86_64.txt new file mode 100644 index 0000000000..7257bb9ee1 --- /dev/null +++ b/geobase/library/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,22 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(abi) +add_subdirectory(api) +add_subdirectory(city_id_calc) +add_subdirectory(db) +add_subdirectory(dispute_regs) +add_subdirectory(utils) + +add_library(geobase-library INTERFACE) +target_link_libraries(geobase-library INTERFACE + contrib-libs-cxxsupp + yutil + geobase-library-api + library-db-stub +) diff --git a/geobase/library/CMakeLists.linux-aarch64.txt b/geobase/library/CMakeLists.linux-aarch64.txt new file mode 100644 index 0000000000..8a01a727c4 --- /dev/null +++ b/geobase/library/CMakeLists.linux-aarch64.txt @@ -0,0 +1,23 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(abi) +add_subdirectory(api) +add_subdirectory(city_id_calc) +add_subdirectory(db) +add_subdirectory(dispute_regs) +add_subdirectory(utils) + +add_library(geobase-library INTERFACE) +target_link_libraries(geobase-library INTERFACE + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + geobase-library-api + library-db-stub +) diff --git a/geobase/library/CMakeLists.linux-x86_64.txt b/geobase/library/CMakeLists.linux-x86_64.txt new file mode 100644 index 0000000000..8a01a727c4 --- /dev/null +++ b/geobase/library/CMakeLists.linux-x86_64.txt @@ -0,0 +1,23 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(abi) +add_subdirectory(api) +add_subdirectory(city_id_calc) +add_subdirectory(db) +add_subdirectory(dispute_regs) +add_subdirectory(utils) + +add_library(geobase-library INTERFACE) +target_link_libraries(geobase-library INTERFACE + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + geobase-library-api + library-db-stub +) diff --git a/geobase/library/CMakeLists.txt b/geobase/library/CMakeLists.txt new file mode 100644 index 0000000000..f8b31df0c1 --- /dev/null +++ b/geobase/library/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) + include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/geobase/library/CMakeLists.windows-x86_64.txt b/geobase/library/CMakeLists.windows-x86_64.txt new file mode 100644 index 0000000000..7257bb9ee1 --- /dev/null +++ b/geobase/library/CMakeLists.windows-x86_64.txt @@ -0,0 +1,22 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(abi) +add_subdirectory(api) +add_subdirectory(city_id_calc) +add_subdirectory(db) +add_subdirectory(dispute_regs) +add_subdirectory(utils) + +add_library(geobase-library INTERFACE) +target_link_libraries(geobase-library INTERFACE + contrib-libs-cxxsupp + yutil + geobase-library-api + library-db-stub +) diff --git a/geobase/library/abi/CMakeLists.darwin-x86_64.txt b/geobase/library/abi/CMakeLists.darwin-x86_64.txt new file mode 100644 index 0000000000..9c18e158b2 --- /dev/null +++ b/geobase/library/abi/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,56 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(geobase-library-abi) +target_compile_options(geobase-library-abi PRIVATE + $<IF:$<CXX_COMPILER_ID:MSVC>,,-Wno-everything> +) +target_include_directories(geobase-library-abi PRIVATE + ${CMAKE_SOURCE_DIR}/geobase/user-settings/include +) +target_link_libraries(geobase-library-abi PUBLIC + contrib-libs-cxxsupp + yutil + contrib-libs-cctz + geobase-library-city_id_calc + geobase-library-dispute_regs + geobase-library-utils + geobase-user-settings + library-cpp-geohash + library-cpp-json + cpp-reverse_geocoder-core +) +target_sources(geobase-library-abi PRIVATE + ${CMAKE_SOURCE_DIR}/geobase/library/asset.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/as_resource.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/binary_reader.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/binary_format_internals.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/borders_resource.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/ipreg_resource.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/ipreg_isp_resource.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/ipreg_traits_resource.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/isp_names_resource.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/fake_resource.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/lookup_impl.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/lookup_impl_impl.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/region_type.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/regions_resource.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/region_types_resource.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/region_fields_resource.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/regions_locale_resource.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/resource_base.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/structs_impl.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/timezone_getter_impl.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/tor_resource.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/v4.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/v6.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/vp_point_distance.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/vp_tree.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/vp_tree_search.cpp +) diff --git a/geobase/library/abi/CMakeLists.linux-aarch64.txt b/geobase/library/abi/CMakeLists.linux-aarch64.txt new file mode 100644 index 0000000000..e5370f291c --- /dev/null +++ b/geobase/library/abi/CMakeLists.linux-aarch64.txt @@ -0,0 +1,57 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(geobase-library-abi) +target_compile_options(geobase-library-abi PRIVATE + $<IF:$<CXX_COMPILER_ID:MSVC>,,-Wno-everything> +) +target_include_directories(geobase-library-abi PRIVATE + ${CMAKE_SOURCE_DIR}/geobase/user-settings/include +) +target_link_libraries(geobase-library-abi PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + contrib-libs-cctz + geobase-library-city_id_calc + geobase-library-dispute_regs + geobase-library-utils + geobase-user-settings + library-cpp-geohash + library-cpp-json + cpp-reverse_geocoder-core +) +target_sources(geobase-library-abi PRIVATE + ${CMAKE_SOURCE_DIR}/geobase/library/asset.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/as_resource.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/binary_reader.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/binary_format_internals.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/borders_resource.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/ipreg_resource.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/ipreg_isp_resource.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/ipreg_traits_resource.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/isp_names_resource.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/fake_resource.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/lookup_impl.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/lookup_impl_impl.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/region_type.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/regions_resource.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/region_types_resource.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/region_fields_resource.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/regions_locale_resource.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/resource_base.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/structs_impl.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/timezone_getter_impl.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/tor_resource.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/v4.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/v6.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/vp_point_distance.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/vp_tree.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/vp_tree_search.cpp +) diff --git a/geobase/library/abi/CMakeLists.linux-x86_64.txt b/geobase/library/abi/CMakeLists.linux-x86_64.txt new file mode 100644 index 0000000000..e5370f291c --- /dev/null +++ b/geobase/library/abi/CMakeLists.linux-x86_64.txt @@ -0,0 +1,57 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(geobase-library-abi) +target_compile_options(geobase-library-abi PRIVATE + $<IF:$<CXX_COMPILER_ID:MSVC>,,-Wno-everything> +) +target_include_directories(geobase-library-abi PRIVATE + ${CMAKE_SOURCE_DIR}/geobase/user-settings/include +) +target_link_libraries(geobase-library-abi PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + contrib-libs-cctz + geobase-library-city_id_calc + geobase-library-dispute_regs + geobase-library-utils + geobase-user-settings + library-cpp-geohash + library-cpp-json + cpp-reverse_geocoder-core +) +target_sources(geobase-library-abi PRIVATE + ${CMAKE_SOURCE_DIR}/geobase/library/asset.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/as_resource.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/binary_reader.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/binary_format_internals.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/borders_resource.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/ipreg_resource.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/ipreg_isp_resource.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/ipreg_traits_resource.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/isp_names_resource.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/fake_resource.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/lookup_impl.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/lookup_impl_impl.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/region_type.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/regions_resource.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/region_types_resource.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/region_fields_resource.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/regions_locale_resource.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/resource_base.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/structs_impl.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/timezone_getter_impl.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/tor_resource.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/v4.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/v6.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/vp_point_distance.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/vp_tree.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/vp_tree_search.cpp +) diff --git a/geobase/library/abi/CMakeLists.txt b/geobase/library/abi/CMakeLists.txt new file mode 100644 index 0000000000..f8b31df0c1 --- /dev/null +++ b/geobase/library/abi/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) + include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/geobase/library/abi/CMakeLists.windows-x86_64.txt b/geobase/library/abi/CMakeLists.windows-x86_64.txt new file mode 100644 index 0000000000..9c18e158b2 --- /dev/null +++ b/geobase/library/abi/CMakeLists.windows-x86_64.txt @@ -0,0 +1,56 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(geobase-library-abi) +target_compile_options(geobase-library-abi PRIVATE + $<IF:$<CXX_COMPILER_ID:MSVC>,,-Wno-everything> +) +target_include_directories(geobase-library-abi PRIVATE + ${CMAKE_SOURCE_DIR}/geobase/user-settings/include +) +target_link_libraries(geobase-library-abi PUBLIC + contrib-libs-cxxsupp + yutil + contrib-libs-cctz + geobase-library-city_id_calc + geobase-library-dispute_regs + geobase-library-utils + geobase-user-settings + library-cpp-geohash + library-cpp-json + cpp-reverse_geocoder-core +) +target_sources(geobase-library-abi PRIVATE + ${CMAKE_SOURCE_DIR}/geobase/library/asset.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/as_resource.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/binary_reader.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/binary_format_internals.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/borders_resource.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/ipreg_resource.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/ipreg_isp_resource.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/ipreg_traits_resource.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/isp_names_resource.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/fake_resource.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/lookup_impl.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/lookup_impl_impl.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/region_type.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/regions_resource.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/region_types_resource.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/region_fields_resource.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/regions_locale_resource.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/resource_base.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/structs_impl.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/timezone_getter_impl.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/tor_resource.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/v4.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/v6.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/vp_point_distance.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/vp_tree.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/vp_tree_search.cpp +) diff --git a/geobase/library/api/CMakeLists.darwin-x86_64.txt b/geobase/library/api/CMakeLists.darwin-x86_64.txt new file mode 100644 index 0000000000..e7d1812621 --- /dev/null +++ b/geobase/library/api/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,25 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(geobase-library-api) +target_compile_options(geobase-library-api PRIVATE + $<IF:$<CXX_COMPILER_ID:MSVC>,,-Wno-everything> +) +target_link_libraries(geobase-library-api PUBLIC + contrib-libs-cxxsupp + yutil + geobase-library-abi +) +target_sources(geobase-library-api PRIVATE + ${CMAKE_SOURCE_DIR}/geobase/library/lookup.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/lookup_wrapper.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/service_getter.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/timezone_getter.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/known_tz_list.cpp +) diff --git a/geobase/library/api/CMakeLists.linux-aarch64.txt b/geobase/library/api/CMakeLists.linux-aarch64.txt new file mode 100644 index 0000000000..40ecd7c759 --- /dev/null +++ b/geobase/library/api/CMakeLists.linux-aarch64.txt @@ -0,0 +1,26 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(geobase-library-api) +target_compile_options(geobase-library-api PRIVATE + $<IF:$<CXX_COMPILER_ID:MSVC>,,-Wno-everything> +) +target_link_libraries(geobase-library-api PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + geobase-library-abi +) +target_sources(geobase-library-api PRIVATE + ${CMAKE_SOURCE_DIR}/geobase/library/lookup.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/lookup_wrapper.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/service_getter.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/timezone_getter.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/known_tz_list.cpp +) diff --git a/geobase/library/api/CMakeLists.linux-x86_64.txt b/geobase/library/api/CMakeLists.linux-x86_64.txt new file mode 100644 index 0000000000..40ecd7c759 --- /dev/null +++ b/geobase/library/api/CMakeLists.linux-x86_64.txt @@ -0,0 +1,26 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(geobase-library-api) +target_compile_options(geobase-library-api PRIVATE + $<IF:$<CXX_COMPILER_ID:MSVC>,,-Wno-everything> +) +target_link_libraries(geobase-library-api PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + geobase-library-abi +) +target_sources(geobase-library-api PRIVATE + ${CMAKE_SOURCE_DIR}/geobase/library/lookup.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/lookup_wrapper.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/service_getter.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/timezone_getter.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/known_tz_list.cpp +) diff --git a/geobase/library/api/CMakeLists.txt b/geobase/library/api/CMakeLists.txt new file mode 100644 index 0000000000..f8b31df0c1 --- /dev/null +++ b/geobase/library/api/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) + include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/geobase/library/api/CMakeLists.windows-x86_64.txt b/geobase/library/api/CMakeLists.windows-x86_64.txt new file mode 100644 index 0000000000..e7d1812621 --- /dev/null +++ b/geobase/library/api/CMakeLists.windows-x86_64.txt @@ -0,0 +1,25 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(geobase-library-api) +target_compile_options(geobase-library-api PRIVATE + $<IF:$<CXX_COMPILER_ID:MSVC>,,-Wno-everything> +) +target_link_libraries(geobase-library-api PUBLIC + contrib-libs-cxxsupp + yutil + geobase-library-abi +) +target_sources(geobase-library-api PRIVATE + ${CMAKE_SOURCE_DIR}/geobase/library/lookup.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/lookup_wrapper.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/service_getter.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/timezone_getter.cpp + ${CMAKE_SOURCE_DIR}/geobase/library/known_tz_list.cpp +) diff --git a/geobase/library/city_id_calc/CMakeLists.darwin-x86_64.txt b/geobase/library/city_id_calc/CMakeLists.darwin-x86_64.txt new file mode 100644 index 0000000000..5859e8c377 --- /dev/null +++ b/geobase/library/city_id_calc/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,20 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(geobase-library-city_id_calc) +target_compile_options(geobase-library-city_id_calc PRIVATE + $<IF:$<CXX_COMPILER_ID:MSVC>,,-Wno-everything> +) +target_link_libraries(geobase-library-city_id_calc PUBLIC + contrib-libs-cxxsupp + yutil +) +target_sources(geobase-library-city_id_calc PRIVATE + ${CMAKE_SOURCE_DIR}/geobase/library/city_id_calc/city_id_base.cpp +) diff --git a/geobase/library/city_id_calc/CMakeLists.linux-aarch64.txt b/geobase/library/city_id_calc/CMakeLists.linux-aarch64.txt new file mode 100644 index 0000000000..799c117c73 --- /dev/null +++ b/geobase/library/city_id_calc/CMakeLists.linux-aarch64.txt @@ -0,0 +1,21 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(geobase-library-city_id_calc) +target_compile_options(geobase-library-city_id_calc PRIVATE + $<IF:$<CXX_COMPILER_ID:MSVC>,,-Wno-everything> +) +target_link_libraries(geobase-library-city_id_calc PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil +) +target_sources(geobase-library-city_id_calc PRIVATE + ${CMAKE_SOURCE_DIR}/geobase/library/city_id_calc/city_id_base.cpp +) diff --git a/geobase/library/city_id_calc/CMakeLists.linux-x86_64.txt b/geobase/library/city_id_calc/CMakeLists.linux-x86_64.txt new file mode 100644 index 0000000000..799c117c73 --- /dev/null +++ b/geobase/library/city_id_calc/CMakeLists.linux-x86_64.txt @@ -0,0 +1,21 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(geobase-library-city_id_calc) +target_compile_options(geobase-library-city_id_calc PRIVATE + $<IF:$<CXX_COMPILER_ID:MSVC>,,-Wno-everything> +) +target_link_libraries(geobase-library-city_id_calc PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil +) +target_sources(geobase-library-city_id_calc PRIVATE + ${CMAKE_SOURCE_DIR}/geobase/library/city_id_calc/city_id_base.cpp +) diff --git a/geobase/library/city_id_calc/CMakeLists.txt b/geobase/library/city_id_calc/CMakeLists.txt new file mode 100644 index 0000000000..f8b31df0c1 --- /dev/null +++ b/geobase/library/city_id_calc/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) + include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/geobase/library/city_id_calc/CMakeLists.windows-x86_64.txt b/geobase/library/city_id_calc/CMakeLists.windows-x86_64.txt new file mode 100644 index 0000000000..5859e8c377 --- /dev/null +++ b/geobase/library/city_id_calc/CMakeLists.windows-x86_64.txt @@ -0,0 +1,20 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(geobase-library-city_id_calc) +target_compile_options(geobase-library-city_id_calc PRIVATE + $<IF:$<CXX_COMPILER_ID:MSVC>,,-Wno-everything> +) +target_link_libraries(geobase-library-city_id_calc PUBLIC + contrib-libs-cxxsupp + yutil +) +target_sources(geobase-library-city_id_calc PRIVATE + ${CMAKE_SOURCE_DIR}/geobase/library/city_id_calc/city_id_base.cpp +) diff --git a/geobase/library/db/CMakeLists.txt b/geobase/library/db/CMakeLists.txt new file mode 100644 index 0000000000..70fc7a172d --- /dev/null +++ b/geobase/library/db/CMakeLists.txt @@ -0,0 +1,9 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(stub) diff --git a/geobase/library/db/stub/CMakeLists.darwin-x86_64.txt b/geobase/library/db/stub/CMakeLists.darwin-x86_64.txt new file mode 100644 index 0000000000..4960807960 --- /dev/null +++ b/geobase/library/db/stub/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,21 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(library-db-stub) +target_compile_options(library-db-stub PRIVATE + $<IF:$<CXX_COMPILER_ID:MSVC>,,-Wno-everything> +) +target_link_libraries(library-db-stub PUBLIC + contrib-libs-cxxsupp + yutil + contrib-libs-protobuf +) +target_sources(library-db-stub PRIVATE + ${CMAKE_SOURCE_DIR}/geobase/library/db/stub/db_resources_internals_stub.cpp +) diff --git a/geobase/library/db/stub/CMakeLists.linux-aarch64.txt b/geobase/library/db/stub/CMakeLists.linux-aarch64.txt new file mode 100644 index 0000000000..4c10cc50ef --- /dev/null +++ b/geobase/library/db/stub/CMakeLists.linux-aarch64.txt @@ -0,0 +1,22 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(library-db-stub) +target_compile_options(library-db-stub PRIVATE + $<IF:$<CXX_COMPILER_ID:MSVC>,,-Wno-everything> +) +target_link_libraries(library-db-stub PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + contrib-libs-protobuf +) +target_sources(library-db-stub PRIVATE + ${CMAKE_SOURCE_DIR}/geobase/library/db/stub/db_resources_internals_stub.cpp +) diff --git a/geobase/library/db/stub/CMakeLists.linux-x86_64.txt b/geobase/library/db/stub/CMakeLists.linux-x86_64.txt new file mode 100644 index 0000000000..4c10cc50ef --- /dev/null +++ b/geobase/library/db/stub/CMakeLists.linux-x86_64.txt @@ -0,0 +1,22 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(library-db-stub) +target_compile_options(library-db-stub PRIVATE + $<IF:$<CXX_COMPILER_ID:MSVC>,,-Wno-everything> +) +target_link_libraries(library-db-stub PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + contrib-libs-protobuf +) +target_sources(library-db-stub PRIVATE + ${CMAKE_SOURCE_DIR}/geobase/library/db/stub/db_resources_internals_stub.cpp +) diff --git a/geobase/library/db/stub/CMakeLists.txt b/geobase/library/db/stub/CMakeLists.txt new file mode 100644 index 0000000000..f8b31df0c1 --- /dev/null +++ b/geobase/library/db/stub/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) + include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/geobase/library/db/stub/CMakeLists.windows-x86_64.txt b/geobase/library/db/stub/CMakeLists.windows-x86_64.txt new file mode 100644 index 0000000000..4960807960 --- /dev/null +++ b/geobase/library/db/stub/CMakeLists.windows-x86_64.txt @@ -0,0 +1,21 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(library-db-stub) +target_compile_options(library-db-stub PRIVATE + $<IF:$<CXX_COMPILER_ID:MSVC>,,-Wno-everything> +) +target_link_libraries(library-db-stub PUBLIC + contrib-libs-cxxsupp + yutil + contrib-libs-protobuf +) +target_sources(library-db-stub PRIVATE + ${CMAKE_SOURCE_DIR}/geobase/library/db/stub/db_resources_internals_stub.cpp +) diff --git a/geobase/library/dispute_regs/CMakeLists.darwin-x86_64.txt b/geobase/library/dispute_regs/CMakeLists.darwin-x86_64.txt new file mode 100644 index 0000000000..30cef124f7 --- /dev/null +++ b/geobase/library/dispute_regs/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,24 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(proto) +add_subdirectory(resource) + +add_library(geobase-library-dispute_regs) +target_link_libraries(geobase-library-dispute_regs PUBLIC + contrib-libs-cxxsupp + yutil + library-dispute_regs-proto + library-dispute_regs-resource + cpp-protobuf-json + library-cpp-json + library-cpp-resource +) +target_sources(geobase-library-dispute_regs PRIVATE + ${CMAKE_SOURCE_DIR}/geobase/library/dispute_regs/config.cpp +) diff --git a/geobase/library/dispute_regs/CMakeLists.linux-aarch64.txt b/geobase/library/dispute_regs/CMakeLists.linux-aarch64.txt new file mode 100644 index 0000000000..769eac8567 --- /dev/null +++ b/geobase/library/dispute_regs/CMakeLists.linux-aarch64.txt @@ -0,0 +1,25 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(proto) +add_subdirectory(resource) + +add_library(geobase-library-dispute_regs) +target_link_libraries(geobase-library-dispute_regs PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + library-dispute_regs-proto + library-dispute_regs-resource + cpp-protobuf-json + library-cpp-json + library-cpp-resource +) +target_sources(geobase-library-dispute_regs PRIVATE + ${CMAKE_SOURCE_DIR}/geobase/library/dispute_regs/config.cpp +) diff --git a/geobase/library/dispute_regs/CMakeLists.linux-x86_64.txt b/geobase/library/dispute_regs/CMakeLists.linux-x86_64.txt new file mode 100644 index 0000000000..769eac8567 --- /dev/null +++ b/geobase/library/dispute_regs/CMakeLists.linux-x86_64.txt @@ -0,0 +1,25 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(proto) +add_subdirectory(resource) + +add_library(geobase-library-dispute_regs) +target_link_libraries(geobase-library-dispute_regs PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + library-dispute_regs-proto + library-dispute_regs-resource + cpp-protobuf-json + library-cpp-json + library-cpp-resource +) +target_sources(geobase-library-dispute_regs PRIVATE + ${CMAKE_SOURCE_DIR}/geobase/library/dispute_regs/config.cpp +) diff --git a/geobase/library/dispute_regs/CMakeLists.txt b/geobase/library/dispute_regs/CMakeLists.txt new file mode 100644 index 0000000000..f8b31df0c1 --- /dev/null +++ b/geobase/library/dispute_regs/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) + include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/geobase/library/dispute_regs/CMakeLists.windows-x86_64.txt b/geobase/library/dispute_regs/CMakeLists.windows-x86_64.txt new file mode 100644 index 0000000000..30cef124f7 --- /dev/null +++ b/geobase/library/dispute_regs/CMakeLists.windows-x86_64.txt @@ -0,0 +1,24 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(proto) +add_subdirectory(resource) + +add_library(geobase-library-dispute_regs) +target_link_libraries(geobase-library-dispute_regs PUBLIC + contrib-libs-cxxsupp + yutil + library-dispute_regs-proto + library-dispute_regs-resource + cpp-protobuf-json + library-cpp-json + library-cpp-resource +) +target_sources(geobase-library-dispute_regs PRIVATE + ${CMAKE_SOURCE_DIR}/geobase/library/dispute_regs/config.cpp +) diff --git a/geobase/library/dispute_regs/proto/CMakeLists.darwin-x86_64.txt b/geobase/library/dispute_regs/proto/CMakeLists.darwin-x86_64.txt new file mode 100644 index 0000000000..d2fb124680 --- /dev/null +++ b/geobase/library/dispute_regs/proto/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,43 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) + +add_library(library-dispute_regs-proto) +target_link_libraries(library-dispute_regs-proto PUBLIC + contrib-libs-cxxsupp + yutil + contrib-libs-protobuf +) +target_proto_messages(library-dispute_regs-proto PRIVATE + ${CMAKE_SOURCE_DIR}/geobase/library/dispute_regs/proto/config.proto +) +target_proto_addincls(library-dispute_regs-proto + ./ + ${CMAKE_SOURCE_DIR}/ + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src +) +target_proto_outs(library-dispute_regs-proto + --cpp_out=${CMAKE_BINARY_DIR}/ + --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ +) diff --git a/geobase/library/dispute_regs/proto/CMakeLists.linux-aarch64.txt b/geobase/library/dispute_regs/proto/CMakeLists.linux-aarch64.txt new file mode 100644 index 0000000000..998b76e8c7 --- /dev/null +++ b/geobase/library/dispute_regs/proto/CMakeLists.linux-aarch64.txt @@ -0,0 +1,44 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) + +add_library(library-dispute_regs-proto) +target_link_libraries(library-dispute_regs-proto PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + contrib-libs-protobuf +) +target_proto_messages(library-dispute_regs-proto PRIVATE + ${CMAKE_SOURCE_DIR}/geobase/library/dispute_regs/proto/config.proto +) +target_proto_addincls(library-dispute_regs-proto + ./ + ${CMAKE_SOURCE_DIR}/ + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src +) +target_proto_outs(library-dispute_regs-proto + --cpp_out=${CMAKE_BINARY_DIR}/ + --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ +) diff --git a/geobase/library/dispute_regs/proto/CMakeLists.linux-x86_64.txt b/geobase/library/dispute_regs/proto/CMakeLists.linux-x86_64.txt new file mode 100644 index 0000000000..998b76e8c7 --- /dev/null +++ b/geobase/library/dispute_regs/proto/CMakeLists.linux-x86_64.txt @@ -0,0 +1,44 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) + +add_library(library-dispute_regs-proto) +target_link_libraries(library-dispute_regs-proto PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + contrib-libs-protobuf +) +target_proto_messages(library-dispute_regs-proto PRIVATE + ${CMAKE_SOURCE_DIR}/geobase/library/dispute_regs/proto/config.proto +) +target_proto_addincls(library-dispute_regs-proto + ./ + ${CMAKE_SOURCE_DIR}/ + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src +) +target_proto_outs(library-dispute_regs-proto + --cpp_out=${CMAKE_BINARY_DIR}/ + --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ +) diff --git a/geobase/library/dispute_regs/proto/CMakeLists.txt b/geobase/library/dispute_regs/proto/CMakeLists.txt new file mode 100644 index 0000000000..f8b31df0c1 --- /dev/null +++ b/geobase/library/dispute_regs/proto/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) + include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/geobase/library/dispute_regs/proto/CMakeLists.windows-x86_64.txt b/geobase/library/dispute_regs/proto/CMakeLists.windows-x86_64.txt new file mode 100644 index 0000000000..d2fb124680 --- /dev/null +++ b/geobase/library/dispute_regs/proto/CMakeLists.windows-x86_64.txt @@ -0,0 +1,43 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) + +add_library(library-dispute_regs-proto) +target_link_libraries(library-dispute_regs-proto PUBLIC + contrib-libs-cxxsupp + yutil + contrib-libs-protobuf +) +target_proto_messages(library-dispute_regs-proto PRIVATE + ${CMAKE_SOURCE_DIR}/geobase/library/dispute_regs/proto/config.proto +) +target_proto_addincls(library-dispute_regs-proto + ./ + ${CMAKE_SOURCE_DIR}/ + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src +) +target_proto_outs(library-dispute_regs-proto + --cpp_out=${CMAKE_BINARY_DIR}/ + --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ +) diff --git a/geobase/library/dispute_regs/resource/CMakeLists.darwin-x86_64.txt b/geobase/library/dispute_regs/resource/CMakeLists.darwin-x86_64.txt new file mode 100644 index 0000000000..76aa5d01ef --- /dev/null +++ b/geobase/library/dispute_regs/resource/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,38 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_rescompiler_bin + TOOL_rescompiler_dependency + tools/rescompiler/bin + rescompiler +) + +add_library(library-dispute_regs-resource INTERFACE) +target_link_libraries(library-dispute_regs-resource INTERFACE + contrib-libs-cxxsupp + yutil + library-cpp-resource +) + +add_global_library_for(library-dispute_regs-resource.global library-dispute_regs-resource) +target_link_libraries(library-dispute_regs-resource.global PUBLIC + contrib-libs-cxxsupp + yutil + library-cpp-resource +) +target_sources(library-dispute_regs-resource.global PRIVATE + ${CMAKE_BINARY_DIR}/geobase/library/dispute_regs/resource/06b2d3b23dce96e1619d2b53d6c947ec.cpp +) +resources(library-dispute_regs-resource.global + ${CMAKE_BINARY_DIR}/geobase/library/dispute_regs/resource/06b2d3b23dce96e1619d2b53d6c947ec.cpp + INPUTS + ${CMAKE_SOURCE_DIR}/geobase/library/dispute_regs/resource/config.json + KEYS + /geobase/dispute-config +) diff --git a/geobase/library/dispute_regs/resource/CMakeLists.linux-aarch64.txt b/geobase/library/dispute_regs/resource/CMakeLists.linux-aarch64.txt new file mode 100644 index 0000000000..7587a04962 --- /dev/null +++ b/geobase/library/dispute_regs/resource/CMakeLists.linux-aarch64.txt @@ -0,0 +1,40 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_rescompiler_bin + TOOL_rescompiler_dependency + tools/rescompiler/bin + rescompiler +) + +add_library(library-dispute_regs-resource INTERFACE) +target_link_libraries(library-dispute_regs-resource INTERFACE + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + library-cpp-resource +) + +add_global_library_for(library-dispute_regs-resource.global library-dispute_regs-resource) +target_link_libraries(library-dispute_regs-resource.global PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + library-cpp-resource +) +target_sources(library-dispute_regs-resource.global PRIVATE + ${CMAKE_BINARY_DIR}/geobase/library/dispute_regs/resource/06b2d3b23dce96e1619d2b53d6c947ec.cpp +) +resources(library-dispute_regs-resource.global + ${CMAKE_BINARY_DIR}/geobase/library/dispute_regs/resource/06b2d3b23dce96e1619d2b53d6c947ec.cpp + INPUTS + ${CMAKE_SOURCE_DIR}/geobase/library/dispute_regs/resource/config.json + KEYS + /geobase/dispute-config +) diff --git a/geobase/library/dispute_regs/resource/CMakeLists.linux-x86_64.txt b/geobase/library/dispute_regs/resource/CMakeLists.linux-x86_64.txt new file mode 100644 index 0000000000..7587a04962 --- /dev/null +++ b/geobase/library/dispute_regs/resource/CMakeLists.linux-x86_64.txt @@ -0,0 +1,40 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_rescompiler_bin + TOOL_rescompiler_dependency + tools/rescompiler/bin + rescompiler +) + +add_library(library-dispute_regs-resource INTERFACE) +target_link_libraries(library-dispute_regs-resource INTERFACE + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + library-cpp-resource +) + +add_global_library_for(library-dispute_regs-resource.global library-dispute_regs-resource) +target_link_libraries(library-dispute_regs-resource.global PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + library-cpp-resource +) +target_sources(library-dispute_regs-resource.global PRIVATE + ${CMAKE_BINARY_DIR}/geobase/library/dispute_regs/resource/06b2d3b23dce96e1619d2b53d6c947ec.cpp +) +resources(library-dispute_regs-resource.global + ${CMAKE_BINARY_DIR}/geobase/library/dispute_regs/resource/06b2d3b23dce96e1619d2b53d6c947ec.cpp + INPUTS + ${CMAKE_SOURCE_DIR}/geobase/library/dispute_regs/resource/config.json + KEYS + /geobase/dispute-config +) diff --git a/geobase/library/dispute_regs/resource/CMakeLists.txt b/geobase/library/dispute_regs/resource/CMakeLists.txt new file mode 100644 index 0000000000..f8b31df0c1 --- /dev/null +++ b/geobase/library/dispute_regs/resource/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) + include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/geobase/library/dispute_regs/resource/CMakeLists.windows-x86_64.txt b/geobase/library/dispute_regs/resource/CMakeLists.windows-x86_64.txt new file mode 100644 index 0000000000..76aa5d01ef --- /dev/null +++ b/geobase/library/dispute_regs/resource/CMakeLists.windows-x86_64.txt @@ -0,0 +1,38 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_rescompiler_bin + TOOL_rescompiler_dependency + tools/rescompiler/bin + rescompiler +) + +add_library(library-dispute_regs-resource INTERFACE) +target_link_libraries(library-dispute_regs-resource INTERFACE + contrib-libs-cxxsupp + yutil + library-cpp-resource +) + +add_global_library_for(library-dispute_regs-resource.global library-dispute_regs-resource) +target_link_libraries(library-dispute_regs-resource.global PUBLIC + contrib-libs-cxxsupp + yutil + library-cpp-resource +) +target_sources(library-dispute_regs-resource.global PRIVATE + ${CMAKE_BINARY_DIR}/geobase/library/dispute_regs/resource/06b2d3b23dce96e1619d2b53d6c947ec.cpp +) +resources(library-dispute_regs-resource.global + ${CMAKE_BINARY_DIR}/geobase/library/dispute_regs/resource/06b2d3b23dce96e1619d2b53d6c947ec.cpp + INPUTS + ${CMAKE_SOURCE_DIR}/geobase/library/dispute_regs/resource/config.json + KEYS + /geobase/dispute-config +) diff --git a/geobase/library/utils/CMakeLists.darwin-x86_64.txt b/geobase/library/utils/CMakeLists.darwin-x86_64.txt new file mode 100644 index 0000000000..41b801b27f --- /dev/null +++ b/geobase/library/utils/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,23 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +find_package(OpenSSL REQUIRED) + +add_library(geobase-library-utils) +target_compile_options(geobase-library-utils PRIVATE + $<IF:$<CXX_COMPILER_ID:MSVC>,,-Wno-everything> +) +target_link_libraries(geobase-library-utils PUBLIC + contrib-libs-cxxsupp + yutil + OpenSSL::OpenSSL + contrib-libs-protobuf +) +target_sources(geobase-library-utils PRIVATE + ${CMAKE_SOURCE_DIR}/geobase/library/utils/builder_utils.cpp +) diff --git a/geobase/library/utils/CMakeLists.linux-aarch64.txt b/geobase/library/utils/CMakeLists.linux-aarch64.txt new file mode 100644 index 0000000000..b6478526df --- /dev/null +++ b/geobase/library/utils/CMakeLists.linux-aarch64.txt @@ -0,0 +1,24 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +find_package(OpenSSL REQUIRED) + +add_library(geobase-library-utils) +target_compile_options(geobase-library-utils PRIVATE + $<IF:$<CXX_COMPILER_ID:MSVC>,,-Wno-everything> +) +target_link_libraries(geobase-library-utils PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + OpenSSL::OpenSSL + contrib-libs-protobuf +) +target_sources(geobase-library-utils PRIVATE + ${CMAKE_SOURCE_DIR}/geobase/library/utils/builder_utils.cpp +) diff --git a/geobase/library/utils/CMakeLists.linux-x86_64.txt b/geobase/library/utils/CMakeLists.linux-x86_64.txt new file mode 100644 index 0000000000..b6478526df --- /dev/null +++ b/geobase/library/utils/CMakeLists.linux-x86_64.txt @@ -0,0 +1,24 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +find_package(OpenSSL REQUIRED) + +add_library(geobase-library-utils) +target_compile_options(geobase-library-utils PRIVATE + $<IF:$<CXX_COMPILER_ID:MSVC>,,-Wno-everything> +) +target_link_libraries(geobase-library-utils PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + OpenSSL::OpenSSL + contrib-libs-protobuf +) +target_sources(geobase-library-utils PRIVATE + ${CMAKE_SOURCE_DIR}/geobase/library/utils/builder_utils.cpp +) diff --git a/geobase/library/utils/CMakeLists.txt b/geobase/library/utils/CMakeLists.txt new file mode 100644 index 0000000000..f8b31df0c1 --- /dev/null +++ b/geobase/library/utils/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) + include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/geobase/library/utils/CMakeLists.windows-x86_64.txt b/geobase/library/utils/CMakeLists.windows-x86_64.txt new file mode 100644 index 0000000000..41b801b27f --- /dev/null +++ b/geobase/library/utils/CMakeLists.windows-x86_64.txt @@ -0,0 +1,23 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +find_package(OpenSSL REQUIRED) + +add_library(geobase-library-utils) +target_compile_options(geobase-library-utils PRIVATE + $<IF:$<CXX_COMPILER_ID:MSVC>,,-Wno-everything> +) +target_link_libraries(geobase-library-utils PUBLIC + contrib-libs-cxxsupp + yutil + OpenSSL::OpenSSL + contrib-libs-protobuf +) +target_sources(geobase-library-utils PRIVATE + ${CMAKE_SOURCE_DIR}/geobase/library/utils/builder_utils.cpp +) diff --git a/geobase/user-settings/CMakeLists.darwin-x86_64.txt b/geobase/user-settings/CMakeLists.darwin-x86_64.txt new file mode 100644 index 0000000000..ebb26215eb --- /dev/null +++ b/geobase/user-settings/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,31 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(geobase-user-settings) +target_compile_options(geobase-user-settings PRIVATE + $<IF:$<CXX_COMPILER_ID:MSVC>,,-Wno-everything> +) +target_include_directories(geobase-user-settings PRIVATE + ${CMAKE_SOURCE_DIR}/geobase/user-settings/include +) +target_link_libraries(geobase-user-settings PUBLIC + contrib-libs-cxxsupp + yutil + cpp-string_utils-base64 +) +target_sources(geobase-user-settings PRIVATE + ${CMAKE_SOURCE_DIR}/geobase/user-settings/library/geo_point.cpp + ${CMAKE_SOURCE_DIR}/geobase/user-settings/library/gp_container.cpp + ${CMAKE_SOURCE_DIR}/geobase/user-settings/library/gpauto_container.cpp + ${CMAKE_SOURCE_DIR}/geobase/user-settings/library/util.cpp + ${CMAKE_SOURCE_DIR}/geobase/user-settings/library/y_cookie.cpp + ${CMAKE_SOURCE_DIR}/geobase/user-settings/library/ygo_container.cpp + ${CMAKE_SOURCE_DIR}/geobase/user-settings/library/yp_cookie.cpp + ${CMAKE_SOURCE_DIR}/geobase/user-settings/library/ys_cookie.cpp +) diff --git a/geobase/user-settings/CMakeLists.linux-aarch64.txt b/geobase/user-settings/CMakeLists.linux-aarch64.txt new file mode 100644 index 0000000000..5a32556987 --- /dev/null +++ b/geobase/user-settings/CMakeLists.linux-aarch64.txt @@ -0,0 +1,32 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(geobase-user-settings) +target_compile_options(geobase-user-settings PRIVATE + $<IF:$<CXX_COMPILER_ID:MSVC>,,-Wno-everything> +) +target_include_directories(geobase-user-settings PRIVATE + ${CMAKE_SOURCE_DIR}/geobase/user-settings/include +) +target_link_libraries(geobase-user-settings PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + cpp-string_utils-base64 +) +target_sources(geobase-user-settings PRIVATE + ${CMAKE_SOURCE_DIR}/geobase/user-settings/library/geo_point.cpp + ${CMAKE_SOURCE_DIR}/geobase/user-settings/library/gp_container.cpp + ${CMAKE_SOURCE_DIR}/geobase/user-settings/library/gpauto_container.cpp + ${CMAKE_SOURCE_DIR}/geobase/user-settings/library/util.cpp + ${CMAKE_SOURCE_DIR}/geobase/user-settings/library/y_cookie.cpp + ${CMAKE_SOURCE_DIR}/geobase/user-settings/library/ygo_container.cpp + ${CMAKE_SOURCE_DIR}/geobase/user-settings/library/yp_cookie.cpp + ${CMAKE_SOURCE_DIR}/geobase/user-settings/library/ys_cookie.cpp +) diff --git a/geobase/user-settings/CMakeLists.linux-x86_64.txt b/geobase/user-settings/CMakeLists.linux-x86_64.txt new file mode 100644 index 0000000000..5a32556987 --- /dev/null +++ b/geobase/user-settings/CMakeLists.linux-x86_64.txt @@ -0,0 +1,32 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(geobase-user-settings) +target_compile_options(geobase-user-settings PRIVATE + $<IF:$<CXX_COMPILER_ID:MSVC>,,-Wno-everything> +) +target_include_directories(geobase-user-settings PRIVATE + ${CMAKE_SOURCE_DIR}/geobase/user-settings/include +) +target_link_libraries(geobase-user-settings PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + cpp-string_utils-base64 +) +target_sources(geobase-user-settings PRIVATE + ${CMAKE_SOURCE_DIR}/geobase/user-settings/library/geo_point.cpp + ${CMAKE_SOURCE_DIR}/geobase/user-settings/library/gp_container.cpp + ${CMAKE_SOURCE_DIR}/geobase/user-settings/library/gpauto_container.cpp + ${CMAKE_SOURCE_DIR}/geobase/user-settings/library/util.cpp + ${CMAKE_SOURCE_DIR}/geobase/user-settings/library/y_cookie.cpp + ${CMAKE_SOURCE_DIR}/geobase/user-settings/library/ygo_container.cpp + ${CMAKE_SOURCE_DIR}/geobase/user-settings/library/yp_cookie.cpp + ${CMAKE_SOURCE_DIR}/geobase/user-settings/library/ys_cookie.cpp +) diff --git a/geobase/user-settings/CMakeLists.txt b/geobase/user-settings/CMakeLists.txt new file mode 100644 index 0000000000..f8b31df0c1 --- /dev/null +++ b/geobase/user-settings/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) + include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/geobase/user-settings/CMakeLists.windows-x86_64.txt b/geobase/user-settings/CMakeLists.windows-x86_64.txt new file mode 100644 index 0000000000..ebb26215eb --- /dev/null +++ b/geobase/user-settings/CMakeLists.windows-x86_64.txt @@ -0,0 +1,31 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(geobase-user-settings) +target_compile_options(geobase-user-settings PRIVATE + $<IF:$<CXX_COMPILER_ID:MSVC>,,-Wno-everything> +) +target_include_directories(geobase-user-settings PRIVATE + ${CMAKE_SOURCE_DIR}/geobase/user-settings/include +) +target_link_libraries(geobase-user-settings PUBLIC + contrib-libs-cxxsupp + yutil + cpp-string_utils-base64 +) +target_sources(geobase-user-settings PRIVATE + ${CMAKE_SOURCE_DIR}/geobase/user-settings/library/geo_point.cpp + ${CMAKE_SOURCE_DIR}/geobase/user-settings/library/gp_container.cpp + ${CMAKE_SOURCE_DIR}/geobase/user-settings/library/gpauto_container.cpp + ${CMAKE_SOURCE_DIR}/geobase/user-settings/library/util.cpp + ${CMAKE_SOURCE_DIR}/geobase/user-settings/library/y_cookie.cpp + ${CMAKE_SOURCE_DIR}/geobase/user-settings/library/ygo_container.cpp + ${CMAKE_SOURCE_DIR}/geobase/user-settings/library/yp_cookie.cpp + ${CMAKE_SOURCE_DIR}/geobase/user-settings/library/ys_cookie.cpp +) diff --git a/kernel/CMakeLists.txt b/kernel/CMakeLists.txt new file mode 100644 index 0000000000..090bc525e7 --- /dev/null +++ b/kernel/CMakeLists.txt @@ -0,0 +1,16 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(blogs) +add_subdirectory(hosts) +add_subdirectory(indexann) +add_subdirectory(langregion) +add_subdirectory(mango) +add_subdirectory(multilanguage_hosts) +add_subdirectory(search_zone) +add_subdirectory(urlnorm) diff --git a/kernel/blogs/CMakeLists.txt b/kernel/blogs/CMakeLists.txt new file mode 100644 index 0000000000..6d580ae9ad --- /dev/null +++ b/kernel/blogs/CMakeLists.txt @@ -0,0 +1,9 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(protos) diff --git a/kernel/blogs/protos/CMakeLists.darwin-x86_64.txt b/kernel/blogs/protos/CMakeLists.darwin-x86_64.txt new file mode 100644 index 0000000000..8b38d6e6de --- /dev/null +++ b/kernel/blogs/protos/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,56 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) + +add_library(kernel-blogs-protos) +target_link_libraries(kernel-blogs-protos PUBLIC + contrib-libs-cxxsupp + yutil + contrib-libs-protobuf +) +target_proto_messages(kernel-blogs-protos PRIVATE + ${CMAKE_SOURCE_DIR}/kernel/blogs/protos/blogs.proto + ${CMAKE_SOURCE_DIR}/kernel/blogs/protos/rss.proto +) +target_proto_addincls(kernel-blogs-protos + ./ + ${CMAKE_SOURCE_DIR}/ + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src +) +target_proto_outs(kernel-blogs-protos + --cpp_out=${CMAKE_BINARY_DIR}/ + --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ +) diff --git a/kernel/blogs/protos/CMakeLists.linux-aarch64.txt b/kernel/blogs/protos/CMakeLists.linux-aarch64.txt new file mode 100644 index 0000000000..c60cbb659f --- /dev/null +++ b/kernel/blogs/protos/CMakeLists.linux-aarch64.txt @@ -0,0 +1,57 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) + +add_library(kernel-blogs-protos) +target_link_libraries(kernel-blogs-protos PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + contrib-libs-protobuf +) +target_proto_messages(kernel-blogs-protos PRIVATE + ${CMAKE_SOURCE_DIR}/kernel/blogs/protos/blogs.proto + ${CMAKE_SOURCE_DIR}/kernel/blogs/protos/rss.proto +) +target_proto_addincls(kernel-blogs-protos + ./ + ${CMAKE_SOURCE_DIR}/ + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src +) +target_proto_outs(kernel-blogs-protos + --cpp_out=${CMAKE_BINARY_DIR}/ + --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ +) diff --git a/kernel/blogs/protos/CMakeLists.linux-x86_64.txt b/kernel/blogs/protos/CMakeLists.linux-x86_64.txt new file mode 100644 index 0000000000..c60cbb659f --- /dev/null +++ b/kernel/blogs/protos/CMakeLists.linux-x86_64.txt @@ -0,0 +1,57 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) + +add_library(kernel-blogs-protos) +target_link_libraries(kernel-blogs-protos PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + contrib-libs-protobuf +) +target_proto_messages(kernel-blogs-protos PRIVATE + ${CMAKE_SOURCE_DIR}/kernel/blogs/protos/blogs.proto + ${CMAKE_SOURCE_DIR}/kernel/blogs/protos/rss.proto +) +target_proto_addincls(kernel-blogs-protos + ./ + ${CMAKE_SOURCE_DIR}/ + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src +) +target_proto_outs(kernel-blogs-protos + --cpp_out=${CMAKE_BINARY_DIR}/ + --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ +) diff --git a/kernel/blogs/protos/CMakeLists.txt b/kernel/blogs/protos/CMakeLists.txt new file mode 100644 index 0000000000..f8b31df0c1 --- /dev/null +++ b/kernel/blogs/protos/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) + include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/kernel/blogs/protos/CMakeLists.windows-x86_64.txt b/kernel/blogs/protos/CMakeLists.windows-x86_64.txt new file mode 100644 index 0000000000..8b38d6e6de --- /dev/null +++ b/kernel/blogs/protos/CMakeLists.windows-x86_64.txt @@ -0,0 +1,56 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) + +add_library(kernel-blogs-protos) +target_link_libraries(kernel-blogs-protos PUBLIC + contrib-libs-cxxsupp + yutil + contrib-libs-protobuf +) +target_proto_messages(kernel-blogs-protos PRIVATE + ${CMAKE_SOURCE_DIR}/kernel/blogs/protos/blogs.proto + ${CMAKE_SOURCE_DIR}/kernel/blogs/protos/rss.proto +) +target_proto_addincls(kernel-blogs-protos + ./ + ${CMAKE_SOURCE_DIR}/ + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src +) +target_proto_outs(kernel-blogs-protos + --cpp_out=${CMAKE_BINARY_DIR}/ + --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ +) diff --git a/kernel/hosts/CMakeLists.txt b/kernel/hosts/CMakeLists.txt new file mode 100644 index 0000000000..516c4594e0 --- /dev/null +++ b/kernel/hosts/CMakeLists.txt @@ -0,0 +1,9 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(owner) diff --git a/kernel/hosts/owner/CMakeLists.darwin-x86_64.txt b/kernel/hosts/owner/CMakeLists.darwin-x86_64.txt new file mode 100644 index 0000000000..cb414f86b7 --- /dev/null +++ b/kernel/hosts/owner/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,53 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_archiver_bin + TOOL_archiver_dependency + tools/archiver + archiver +) + +add_library(kernel-hosts-owner) +target_include_directories(kernel-hosts-owner PUBLIC + ${CMAKE_BINARY_DIR}/kernel/hosts/owner +) +target_link_libraries(kernel-hosts-owner PUBLIC + contrib-libs-cxxsupp + yutil + library-cpp-archive + cpp-containers-str_hash + cpp-string_utils-url +) +target_sources(kernel-hosts-owner PRIVATE + ${CMAKE_SOURCE_DIR}/kernel/hosts/owner/owner.cpp + ${CMAKE_BINARY_DIR}/kernel/hosts/owner/urlrules.inc +) +add_custom_command( + OUTPUT + ${CMAKE_BINARY_DIR}/kernel/hosts/owner/urlrules.inc + DEPENDS + ${TOOL_archiver_bin} + ${CMAKE_SOURCE_DIR}/yweb/urlrules/areas.lst + ${CMAKE_SOURCE_DIR}/yweb/urlrules/2ld.list + ${CMAKE_SOURCE_DIR}/yweb/urlrules/ungrouped.list + COMMAND + ${TOOL_archiver_bin} + -q + -x + ${CMAKE_SOURCE_DIR}/yweb/urlrules/areas.lst: + ${CMAKE_SOURCE_DIR}/yweb/urlrules/2ld.list: + ${CMAKE_SOURCE_DIR}/yweb/urlrules/ungrouped.list: + -o + ${CMAKE_BINARY_DIR}/kernel/hosts/owner/urlrules.inc +) +if(NOT CMAKE_CROSSCOMPILING) + add_dependencies(kernel-hosts-owner + archiver +) +endif() diff --git a/kernel/hosts/owner/CMakeLists.linux-aarch64.txt b/kernel/hosts/owner/CMakeLists.linux-aarch64.txt new file mode 100644 index 0000000000..7421b7739c --- /dev/null +++ b/kernel/hosts/owner/CMakeLists.linux-aarch64.txt @@ -0,0 +1,54 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_archiver_bin + TOOL_archiver_dependency + tools/archiver + archiver +) + +add_library(kernel-hosts-owner) +target_include_directories(kernel-hosts-owner PUBLIC + ${CMAKE_BINARY_DIR}/kernel/hosts/owner +) +target_link_libraries(kernel-hosts-owner PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + library-cpp-archive + cpp-containers-str_hash + cpp-string_utils-url +) +target_sources(kernel-hosts-owner PRIVATE + ${CMAKE_SOURCE_DIR}/kernel/hosts/owner/owner.cpp + ${CMAKE_BINARY_DIR}/kernel/hosts/owner/urlrules.inc +) +add_custom_command( + OUTPUT + ${CMAKE_BINARY_DIR}/kernel/hosts/owner/urlrules.inc + DEPENDS + ${TOOL_archiver_bin} + ${CMAKE_SOURCE_DIR}/yweb/urlrules/areas.lst + ${CMAKE_SOURCE_DIR}/yweb/urlrules/2ld.list + ${CMAKE_SOURCE_DIR}/yweb/urlrules/ungrouped.list + COMMAND + ${TOOL_archiver_bin} + -q + -x + ${CMAKE_SOURCE_DIR}/yweb/urlrules/areas.lst: + ${CMAKE_SOURCE_DIR}/yweb/urlrules/2ld.list: + ${CMAKE_SOURCE_DIR}/yweb/urlrules/ungrouped.list: + -o + ${CMAKE_BINARY_DIR}/kernel/hosts/owner/urlrules.inc +) +if(NOT CMAKE_CROSSCOMPILING) + add_dependencies(kernel-hosts-owner + archiver +) +endif() diff --git a/kernel/hosts/owner/CMakeLists.linux-x86_64.txt b/kernel/hosts/owner/CMakeLists.linux-x86_64.txt new file mode 100644 index 0000000000..7421b7739c --- /dev/null +++ b/kernel/hosts/owner/CMakeLists.linux-x86_64.txt @@ -0,0 +1,54 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_archiver_bin + TOOL_archiver_dependency + tools/archiver + archiver +) + +add_library(kernel-hosts-owner) +target_include_directories(kernel-hosts-owner PUBLIC + ${CMAKE_BINARY_DIR}/kernel/hosts/owner +) +target_link_libraries(kernel-hosts-owner PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + library-cpp-archive + cpp-containers-str_hash + cpp-string_utils-url +) +target_sources(kernel-hosts-owner PRIVATE + ${CMAKE_SOURCE_DIR}/kernel/hosts/owner/owner.cpp + ${CMAKE_BINARY_DIR}/kernel/hosts/owner/urlrules.inc +) +add_custom_command( + OUTPUT + ${CMAKE_BINARY_DIR}/kernel/hosts/owner/urlrules.inc + DEPENDS + ${TOOL_archiver_bin} + ${CMAKE_SOURCE_DIR}/yweb/urlrules/areas.lst + ${CMAKE_SOURCE_DIR}/yweb/urlrules/2ld.list + ${CMAKE_SOURCE_DIR}/yweb/urlrules/ungrouped.list + COMMAND + ${TOOL_archiver_bin} + -q + -x + ${CMAKE_SOURCE_DIR}/yweb/urlrules/areas.lst: + ${CMAKE_SOURCE_DIR}/yweb/urlrules/2ld.list: + ${CMAKE_SOURCE_DIR}/yweb/urlrules/ungrouped.list: + -o + ${CMAKE_BINARY_DIR}/kernel/hosts/owner/urlrules.inc +) +if(NOT CMAKE_CROSSCOMPILING) + add_dependencies(kernel-hosts-owner + archiver +) +endif() diff --git a/kernel/hosts/owner/CMakeLists.txt b/kernel/hosts/owner/CMakeLists.txt new file mode 100644 index 0000000000..f8b31df0c1 --- /dev/null +++ b/kernel/hosts/owner/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) + include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/kernel/hosts/owner/CMakeLists.windows-x86_64.txt b/kernel/hosts/owner/CMakeLists.windows-x86_64.txt new file mode 100644 index 0000000000..cb414f86b7 --- /dev/null +++ b/kernel/hosts/owner/CMakeLists.windows-x86_64.txt @@ -0,0 +1,53 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_archiver_bin + TOOL_archiver_dependency + tools/archiver + archiver +) + +add_library(kernel-hosts-owner) +target_include_directories(kernel-hosts-owner PUBLIC + ${CMAKE_BINARY_DIR}/kernel/hosts/owner +) +target_link_libraries(kernel-hosts-owner PUBLIC + contrib-libs-cxxsupp + yutil + library-cpp-archive + cpp-containers-str_hash + cpp-string_utils-url +) +target_sources(kernel-hosts-owner PRIVATE + ${CMAKE_SOURCE_DIR}/kernel/hosts/owner/owner.cpp + ${CMAKE_BINARY_DIR}/kernel/hosts/owner/urlrules.inc +) +add_custom_command( + OUTPUT + ${CMAKE_BINARY_DIR}/kernel/hosts/owner/urlrules.inc + DEPENDS + ${TOOL_archiver_bin} + ${CMAKE_SOURCE_DIR}/yweb/urlrules/areas.lst + ${CMAKE_SOURCE_DIR}/yweb/urlrules/2ld.list + ${CMAKE_SOURCE_DIR}/yweb/urlrules/ungrouped.list + COMMAND + ${TOOL_archiver_bin} + -q + -x + ${CMAKE_SOURCE_DIR}/yweb/urlrules/areas.lst: + ${CMAKE_SOURCE_DIR}/yweb/urlrules/2ld.list: + ${CMAKE_SOURCE_DIR}/yweb/urlrules/ungrouped.list: + -o + ${CMAKE_BINARY_DIR}/kernel/hosts/owner/urlrules.inc +) +if(NOT CMAKE_CROSSCOMPILING) + add_dependencies(kernel-hosts-owner + archiver +) +endif() diff --git a/kernel/indexann/CMakeLists.txt b/kernel/indexann/CMakeLists.txt new file mode 100644 index 0000000000..6d580ae9ad --- /dev/null +++ b/kernel/indexann/CMakeLists.txt @@ -0,0 +1,9 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(protos) diff --git a/kernel/indexann/protos/CMakeLists.darwin-x86_64.txt b/kernel/indexann/protos/CMakeLists.darwin-x86_64.txt new file mode 100644 index 0000000000..b9a657cc4a --- /dev/null +++ b/kernel/indexann/protos/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,57 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) + +add_library(kernel-indexann-protos) +target_link_libraries(kernel-indexann-protos PUBLIC + contrib-libs-cxxsupp + yutil + yt-interface-protos + contrib-libs-protobuf +) +target_proto_messages(kernel-indexann-protos PRIVATE + ${CMAKE_SOURCE_DIR}/kernel/indexann/protos/data.proto + ${CMAKE_SOURCE_DIR}/kernel/indexann/protos/portion.proto +) +target_proto_addincls(kernel-indexann-protos + ./ + ${CMAKE_SOURCE_DIR}/ + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src +) +target_proto_outs(kernel-indexann-protos + --cpp_out=${CMAKE_BINARY_DIR}/ + --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ +) diff --git a/kernel/indexann/protos/CMakeLists.linux-aarch64.txt b/kernel/indexann/protos/CMakeLists.linux-aarch64.txt new file mode 100644 index 0000000000..2a0f142e64 --- /dev/null +++ b/kernel/indexann/protos/CMakeLists.linux-aarch64.txt @@ -0,0 +1,58 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) + +add_library(kernel-indexann-protos) +target_link_libraries(kernel-indexann-protos PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + yt-interface-protos + contrib-libs-protobuf +) +target_proto_messages(kernel-indexann-protos PRIVATE + ${CMAKE_SOURCE_DIR}/kernel/indexann/protos/data.proto + ${CMAKE_SOURCE_DIR}/kernel/indexann/protos/portion.proto +) +target_proto_addincls(kernel-indexann-protos + ./ + ${CMAKE_SOURCE_DIR}/ + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src +) +target_proto_outs(kernel-indexann-protos + --cpp_out=${CMAKE_BINARY_DIR}/ + --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ +) diff --git a/kernel/indexann/protos/CMakeLists.linux-x86_64.txt b/kernel/indexann/protos/CMakeLists.linux-x86_64.txt new file mode 100644 index 0000000000..2a0f142e64 --- /dev/null +++ b/kernel/indexann/protos/CMakeLists.linux-x86_64.txt @@ -0,0 +1,58 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) + +add_library(kernel-indexann-protos) +target_link_libraries(kernel-indexann-protos PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + yt-interface-protos + contrib-libs-protobuf +) +target_proto_messages(kernel-indexann-protos PRIVATE + ${CMAKE_SOURCE_DIR}/kernel/indexann/protos/data.proto + ${CMAKE_SOURCE_DIR}/kernel/indexann/protos/portion.proto +) +target_proto_addincls(kernel-indexann-protos + ./ + ${CMAKE_SOURCE_DIR}/ + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src +) +target_proto_outs(kernel-indexann-protos + --cpp_out=${CMAKE_BINARY_DIR}/ + --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ +) diff --git a/kernel/indexann/protos/CMakeLists.txt b/kernel/indexann/protos/CMakeLists.txt new file mode 100644 index 0000000000..f8b31df0c1 --- /dev/null +++ b/kernel/indexann/protos/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) + include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/kernel/indexann/protos/CMakeLists.windows-x86_64.txt b/kernel/indexann/protos/CMakeLists.windows-x86_64.txt new file mode 100644 index 0000000000..b9a657cc4a --- /dev/null +++ b/kernel/indexann/protos/CMakeLists.windows-x86_64.txt @@ -0,0 +1,57 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) + +add_library(kernel-indexann-protos) +target_link_libraries(kernel-indexann-protos PUBLIC + contrib-libs-cxxsupp + yutil + yt-interface-protos + contrib-libs-protobuf +) +target_proto_messages(kernel-indexann-protos PRIVATE + ${CMAKE_SOURCE_DIR}/kernel/indexann/protos/data.proto + ${CMAKE_SOURCE_DIR}/kernel/indexann/protos/portion.proto +) +target_proto_addincls(kernel-indexann-protos + ./ + ${CMAKE_SOURCE_DIR}/ + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src +) +target_proto_outs(kernel-indexann-protos + --cpp_out=${CMAKE_BINARY_DIR}/ + --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ +) diff --git a/kernel/langregion/CMakeLists.darwin-x86_64.txt b/kernel/langregion/CMakeLists.darwin-x86_64.txt new file mode 100644 index 0000000000..610427f4c4 --- /dev/null +++ b/kernel/langregion/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,18 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(kernel-langregion) +target_link_libraries(kernel-langregion PUBLIC + contrib-libs-cxxsupp + yutil + library-cpp-charset +) +target_sources(kernel-langregion PRIVATE + ${CMAKE_SOURCE_DIR}/kernel/langregion/langregion.cpp +) diff --git a/kernel/langregion/CMakeLists.linux-aarch64.txt b/kernel/langregion/CMakeLists.linux-aarch64.txt new file mode 100644 index 0000000000..513f55fda2 --- /dev/null +++ b/kernel/langregion/CMakeLists.linux-aarch64.txt @@ -0,0 +1,19 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(kernel-langregion) +target_link_libraries(kernel-langregion PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + library-cpp-charset +) +target_sources(kernel-langregion PRIVATE + ${CMAKE_SOURCE_DIR}/kernel/langregion/langregion.cpp +) diff --git a/kernel/langregion/CMakeLists.linux-x86_64.txt b/kernel/langregion/CMakeLists.linux-x86_64.txt new file mode 100644 index 0000000000..513f55fda2 --- /dev/null +++ b/kernel/langregion/CMakeLists.linux-x86_64.txt @@ -0,0 +1,19 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(kernel-langregion) +target_link_libraries(kernel-langregion PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + library-cpp-charset +) +target_sources(kernel-langregion PRIVATE + ${CMAKE_SOURCE_DIR}/kernel/langregion/langregion.cpp +) diff --git a/kernel/langregion/CMakeLists.txt b/kernel/langregion/CMakeLists.txt new file mode 100644 index 0000000000..f8b31df0c1 --- /dev/null +++ b/kernel/langregion/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) + include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/kernel/langregion/CMakeLists.windows-x86_64.txt b/kernel/langregion/CMakeLists.windows-x86_64.txt new file mode 100644 index 0000000000..610427f4c4 --- /dev/null +++ b/kernel/langregion/CMakeLists.windows-x86_64.txt @@ -0,0 +1,18 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(kernel-langregion) +target_link_libraries(kernel-langregion PUBLIC + contrib-libs-cxxsupp + yutil + library-cpp-charset +) +target_sources(kernel-langregion PRIVATE + ${CMAKE_SOURCE_DIR}/kernel/langregion/langregion.cpp +) diff --git a/kernel/mango/CMakeLists.txt b/kernel/mango/CMakeLists.txt new file mode 100644 index 0000000000..499930c4b0 --- /dev/null +++ b/kernel/mango/CMakeLists.txt @@ -0,0 +1,9 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(proto) diff --git a/kernel/mango/proto/CMakeLists.darwin-x86_64.txt b/kernel/mango/proto/CMakeLists.darwin-x86_64.txt new file mode 100644 index 0000000000..84532c4d8d --- /dev/null +++ b/kernel/mango/proto/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,176 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) + +add_library(kernel-mango-proto) +target_link_libraries(kernel-mango-proto PUBLIC + contrib-libs-cxxsupp + yutil + kernel-blogs-protos + kernel-indexann-protos + cpp-langmask-proto + contrib-libs-protobuf +) +target_proto_messages(kernel-mango-proto PRIVATE + ${CMAKE_SOURCE_DIR}/kernel/mango/proto/author.proto + ${CMAKE_SOURCE_DIR}/kernel/mango/proto/authority.proto + ${CMAKE_SOURCE_DIR}/kernel/mango/proto/common.proto + ${CMAKE_SOURCE_DIR}/kernel/mango/proto/content.proto + ${CMAKE_SOURCE_DIR}/kernel/mango/proto/dl.proto + ${CMAKE_SOURCE_DIR}/kernel/mango/proto/fresh_feeds.proto + ${CMAKE_SOURCE_DIR}/kernel/mango/proto/ofeed.proto + ${CMAKE_SOURCE_DIR}/kernel/mango/proto/biased.proto + ${CMAKE_SOURCE_DIR}/kernel/mango/proto/quotes.proto + ${CMAKE_SOURCE_DIR}/kernel/mango/proto/trees.proto + ${CMAKE_SOURCE_DIR}/kernel/mango/proto/statistics.proto +) +target_proto_addincls(kernel-mango-proto + ./ + ${CMAKE_SOURCE_DIR}/ + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src +) +target_proto_outs(kernel-mango-proto + --cpp_out=${CMAKE_BINARY_DIR}/ + --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ +) diff --git a/kernel/mango/proto/CMakeLists.linux-aarch64.txt b/kernel/mango/proto/CMakeLists.linux-aarch64.txt new file mode 100644 index 0000000000..4842433605 --- /dev/null +++ b/kernel/mango/proto/CMakeLists.linux-aarch64.txt @@ -0,0 +1,177 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) + +add_library(kernel-mango-proto) +target_link_libraries(kernel-mango-proto PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + kernel-blogs-protos + kernel-indexann-protos + cpp-langmask-proto + contrib-libs-protobuf +) +target_proto_messages(kernel-mango-proto PRIVATE + ${CMAKE_SOURCE_DIR}/kernel/mango/proto/author.proto + ${CMAKE_SOURCE_DIR}/kernel/mango/proto/authority.proto + ${CMAKE_SOURCE_DIR}/kernel/mango/proto/common.proto + ${CMAKE_SOURCE_DIR}/kernel/mango/proto/content.proto + ${CMAKE_SOURCE_DIR}/kernel/mango/proto/dl.proto + ${CMAKE_SOURCE_DIR}/kernel/mango/proto/fresh_feeds.proto + ${CMAKE_SOURCE_DIR}/kernel/mango/proto/ofeed.proto + ${CMAKE_SOURCE_DIR}/kernel/mango/proto/biased.proto + ${CMAKE_SOURCE_DIR}/kernel/mango/proto/quotes.proto + ${CMAKE_SOURCE_DIR}/kernel/mango/proto/trees.proto + ${CMAKE_SOURCE_DIR}/kernel/mango/proto/statistics.proto +) +target_proto_addincls(kernel-mango-proto + ./ + ${CMAKE_SOURCE_DIR}/ + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src +) +target_proto_outs(kernel-mango-proto + --cpp_out=${CMAKE_BINARY_DIR}/ + --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ +) diff --git a/kernel/mango/proto/CMakeLists.linux-x86_64.txt b/kernel/mango/proto/CMakeLists.linux-x86_64.txt new file mode 100644 index 0000000000..4842433605 --- /dev/null +++ b/kernel/mango/proto/CMakeLists.linux-x86_64.txt @@ -0,0 +1,177 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) + +add_library(kernel-mango-proto) +target_link_libraries(kernel-mango-proto PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + kernel-blogs-protos + kernel-indexann-protos + cpp-langmask-proto + contrib-libs-protobuf +) +target_proto_messages(kernel-mango-proto PRIVATE + ${CMAKE_SOURCE_DIR}/kernel/mango/proto/author.proto + ${CMAKE_SOURCE_DIR}/kernel/mango/proto/authority.proto + ${CMAKE_SOURCE_DIR}/kernel/mango/proto/common.proto + ${CMAKE_SOURCE_DIR}/kernel/mango/proto/content.proto + ${CMAKE_SOURCE_DIR}/kernel/mango/proto/dl.proto + ${CMAKE_SOURCE_DIR}/kernel/mango/proto/fresh_feeds.proto + ${CMAKE_SOURCE_DIR}/kernel/mango/proto/ofeed.proto + ${CMAKE_SOURCE_DIR}/kernel/mango/proto/biased.proto + ${CMAKE_SOURCE_DIR}/kernel/mango/proto/quotes.proto + ${CMAKE_SOURCE_DIR}/kernel/mango/proto/trees.proto + ${CMAKE_SOURCE_DIR}/kernel/mango/proto/statistics.proto +) +target_proto_addincls(kernel-mango-proto + ./ + ${CMAKE_SOURCE_DIR}/ + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src +) +target_proto_outs(kernel-mango-proto + --cpp_out=${CMAKE_BINARY_DIR}/ + --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ +) diff --git a/kernel/mango/proto/CMakeLists.txt b/kernel/mango/proto/CMakeLists.txt new file mode 100644 index 0000000000..f8b31df0c1 --- /dev/null +++ b/kernel/mango/proto/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) + include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/kernel/mango/proto/CMakeLists.windows-x86_64.txt b/kernel/mango/proto/CMakeLists.windows-x86_64.txt new file mode 100644 index 0000000000..84532c4d8d --- /dev/null +++ b/kernel/mango/proto/CMakeLists.windows-x86_64.txt @@ -0,0 +1,176 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) + +add_library(kernel-mango-proto) +target_link_libraries(kernel-mango-proto PUBLIC + contrib-libs-cxxsupp + yutil + kernel-blogs-protos + kernel-indexann-protos + cpp-langmask-proto + contrib-libs-protobuf +) +target_proto_messages(kernel-mango-proto PRIVATE + ${CMAKE_SOURCE_DIR}/kernel/mango/proto/author.proto + ${CMAKE_SOURCE_DIR}/kernel/mango/proto/authority.proto + ${CMAKE_SOURCE_DIR}/kernel/mango/proto/common.proto + ${CMAKE_SOURCE_DIR}/kernel/mango/proto/content.proto + ${CMAKE_SOURCE_DIR}/kernel/mango/proto/dl.proto + ${CMAKE_SOURCE_DIR}/kernel/mango/proto/fresh_feeds.proto + ${CMAKE_SOURCE_DIR}/kernel/mango/proto/ofeed.proto + ${CMAKE_SOURCE_DIR}/kernel/mango/proto/biased.proto + ${CMAKE_SOURCE_DIR}/kernel/mango/proto/quotes.proto + ${CMAKE_SOURCE_DIR}/kernel/mango/proto/trees.proto + ${CMAKE_SOURCE_DIR}/kernel/mango/proto/statistics.proto +) +target_proto_addincls(kernel-mango-proto + ./ + ${CMAKE_SOURCE_DIR}/ + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src +) +target_proto_outs(kernel-mango-proto + --cpp_out=${CMAKE_BINARY_DIR}/ + --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ +) diff --git a/kernel/multilanguage_hosts/CMakeLists.darwin-x86_64.txt b/kernel/multilanguage_hosts/CMakeLists.darwin-x86_64.txt new file mode 100644 index 0000000000..ca8f3d8057 --- /dev/null +++ b/kernel/multilanguage_hosts/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,20 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(kernel-multilanguage_hosts) +target_link_libraries(kernel-multilanguage_hosts PUBLIC + contrib-libs-cxxsupp + yutil + kernel-langregion + cpp-string_utils-url +) +target_sources(kernel-multilanguage_hosts PRIVATE + ${CMAKE_SOURCE_DIR}/kernel/multilanguage_hosts/multilanguage_hosts.cpp + ${CMAKE_SOURCE_DIR}/kernel/multilanguage_hosts/multilang_prefixes.cpp +) diff --git a/kernel/multilanguage_hosts/CMakeLists.linux-aarch64.txt b/kernel/multilanguage_hosts/CMakeLists.linux-aarch64.txt new file mode 100644 index 0000000000..b64c151d91 --- /dev/null +++ b/kernel/multilanguage_hosts/CMakeLists.linux-aarch64.txt @@ -0,0 +1,21 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(kernel-multilanguage_hosts) +target_link_libraries(kernel-multilanguage_hosts PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + kernel-langregion + cpp-string_utils-url +) +target_sources(kernel-multilanguage_hosts PRIVATE + ${CMAKE_SOURCE_DIR}/kernel/multilanguage_hosts/multilanguage_hosts.cpp + ${CMAKE_SOURCE_DIR}/kernel/multilanguage_hosts/multilang_prefixes.cpp +) diff --git a/kernel/multilanguage_hosts/CMakeLists.linux-x86_64.txt b/kernel/multilanguage_hosts/CMakeLists.linux-x86_64.txt new file mode 100644 index 0000000000..b64c151d91 --- /dev/null +++ b/kernel/multilanguage_hosts/CMakeLists.linux-x86_64.txt @@ -0,0 +1,21 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(kernel-multilanguage_hosts) +target_link_libraries(kernel-multilanguage_hosts PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + kernel-langregion + cpp-string_utils-url +) +target_sources(kernel-multilanguage_hosts PRIVATE + ${CMAKE_SOURCE_DIR}/kernel/multilanguage_hosts/multilanguage_hosts.cpp + ${CMAKE_SOURCE_DIR}/kernel/multilanguage_hosts/multilang_prefixes.cpp +) diff --git a/kernel/multilanguage_hosts/CMakeLists.txt b/kernel/multilanguage_hosts/CMakeLists.txt new file mode 100644 index 0000000000..f8b31df0c1 --- /dev/null +++ b/kernel/multilanguage_hosts/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) + include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/kernel/multilanguage_hosts/CMakeLists.windows-x86_64.txt b/kernel/multilanguage_hosts/CMakeLists.windows-x86_64.txt new file mode 100644 index 0000000000..ca8f3d8057 --- /dev/null +++ b/kernel/multilanguage_hosts/CMakeLists.windows-x86_64.txt @@ -0,0 +1,20 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(kernel-multilanguage_hosts) +target_link_libraries(kernel-multilanguage_hosts PUBLIC + contrib-libs-cxxsupp + yutil + kernel-langregion + cpp-string_utils-url +) +target_sources(kernel-multilanguage_hosts PRIVATE + ${CMAKE_SOURCE_DIR}/kernel/multilanguage_hosts/multilanguage_hosts.cpp + ${CMAKE_SOURCE_DIR}/kernel/multilanguage_hosts/multilang_prefixes.cpp +) diff --git a/kernel/search_zone/CMakeLists.txt b/kernel/search_zone/CMakeLists.txt new file mode 100644 index 0000000000..6d580ae9ad --- /dev/null +++ b/kernel/search_zone/CMakeLists.txt @@ -0,0 +1,9 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(protos) diff --git a/kernel/search_zone/protos/CMakeLists.darwin-x86_64.txt b/kernel/search_zone/protos/CMakeLists.darwin-x86_64.txt new file mode 100644 index 0000000000..632f507298 --- /dev/null +++ b/kernel/search_zone/protos/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,45 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) + +add_library(kernel-search_zone-protos) +target_link_libraries(kernel-search_zone-protos PUBLIC + contrib-libs-cxxsupp + yutil + yt-interface-protos + yt_proto-yt-formats + contrib-libs-protobuf +) +target_proto_messages(kernel-search_zone-protos PRIVATE + ${CMAKE_SOURCE_DIR}/kernel/search_zone/protos/searchzone.proto +) +target_proto_addincls(kernel-search_zone-protos + ./ + ${CMAKE_SOURCE_DIR}/ + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src +) +target_proto_outs(kernel-search_zone-protos + --cpp_out=${CMAKE_BINARY_DIR}/ + --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ +) diff --git a/kernel/search_zone/protos/CMakeLists.linux-aarch64.txt b/kernel/search_zone/protos/CMakeLists.linux-aarch64.txt new file mode 100644 index 0000000000..25049b81ee --- /dev/null +++ b/kernel/search_zone/protos/CMakeLists.linux-aarch64.txt @@ -0,0 +1,46 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) + +add_library(kernel-search_zone-protos) +target_link_libraries(kernel-search_zone-protos PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + yt-interface-protos + yt_proto-yt-formats + contrib-libs-protobuf +) +target_proto_messages(kernel-search_zone-protos PRIVATE + ${CMAKE_SOURCE_DIR}/kernel/search_zone/protos/searchzone.proto +) +target_proto_addincls(kernel-search_zone-protos + ./ + ${CMAKE_SOURCE_DIR}/ + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src +) +target_proto_outs(kernel-search_zone-protos + --cpp_out=${CMAKE_BINARY_DIR}/ + --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ +) diff --git a/kernel/search_zone/protos/CMakeLists.linux-x86_64.txt b/kernel/search_zone/protos/CMakeLists.linux-x86_64.txt new file mode 100644 index 0000000000..25049b81ee --- /dev/null +++ b/kernel/search_zone/protos/CMakeLists.linux-x86_64.txt @@ -0,0 +1,46 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) + +add_library(kernel-search_zone-protos) +target_link_libraries(kernel-search_zone-protos PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + yt-interface-protos + yt_proto-yt-formats + contrib-libs-protobuf +) +target_proto_messages(kernel-search_zone-protos PRIVATE + ${CMAKE_SOURCE_DIR}/kernel/search_zone/protos/searchzone.proto +) +target_proto_addincls(kernel-search_zone-protos + ./ + ${CMAKE_SOURCE_DIR}/ + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src +) +target_proto_outs(kernel-search_zone-protos + --cpp_out=${CMAKE_BINARY_DIR}/ + --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ +) diff --git a/kernel/search_zone/protos/CMakeLists.txt b/kernel/search_zone/protos/CMakeLists.txt new file mode 100644 index 0000000000..f8b31df0c1 --- /dev/null +++ b/kernel/search_zone/protos/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) + include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/kernel/search_zone/protos/CMakeLists.windows-x86_64.txt b/kernel/search_zone/protos/CMakeLists.windows-x86_64.txt new file mode 100644 index 0000000000..632f507298 --- /dev/null +++ b/kernel/search_zone/protos/CMakeLists.windows-x86_64.txt @@ -0,0 +1,45 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) + +add_library(kernel-search_zone-protos) +target_link_libraries(kernel-search_zone-protos PUBLIC + contrib-libs-cxxsupp + yutil + yt-interface-protos + yt_proto-yt-formats + contrib-libs-protobuf +) +target_proto_messages(kernel-search_zone-protos PRIVATE + ${CMAKE_SOURCE_DIR}/kernel/search_zone/protos/searchzone.proto +) +target_proto_addincls(kernel-search_zone-protos + ./ + ${CMAKE_SOURCE_DIR}/ + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src +) +target_proto_outs(kernel-search_zone-protos + --cpp_out=${CMAKE_BINARY_DIR}/ + --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ +) diff --git a/kernel/urlnorm/CMakeLists.darwin-x86_64.txt b/kernel/urlnorm/CMakeLists.darwin-x86_64.txt new file mode 100644 index 0000000000..e3a91ac4ad --- /dev/null +++ b/kernel/urlnorm/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,39 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_ragel5_bin + TOOL_ragel5_dependency + contrib/tools/ragel5/ragel + ragel5 +) +get_built_tool_path( + TOOL_rlgen-cd_bin + TOOL_rlgen-cd_dependency + contrib/tools/ragel5/rlgen-cd + rlgen-cd +) + +add_library(kernel-urlnorm) +target_link_libraries(kernel-urlnorm PUBLIC + contrib-libs-cxxsupp + yutil + library-cpp-cgiparam + cpp-digest-md5 + cpp-string_utils-base64 + cpp-string_utils-quote + cpp-string_utils-url + library-cpp-uri +) +target_sources(kernel-urlnorm PRIVATE + ${CMAKE_SOURCE_DIR}/kernel/urlnorm/host.cpp + ${CMAKE_SOURCE_DIR}/kernel/urlnorm/normalize.cpp + ${CMAKE_SOURCE_DIR}/kernel/urlnorm/urlnorm.cpp + ${CMAKE_SOURCE_DIR}/kernel/urlnorm/validate.cpp + ${CMAKE_BINARY_DIR}/kernel/urlnorm/urlhashval.rl5.cpp +) diff --git a/kernel/urlnorm/CMakeLists.linux-aarch64.txt b/kernel/urlnorm/CMakeLists.linux-aarch64.txt new file mode 100644 index 0000000000..d365d758ea --- /dev/null +++ b/kernel/urlnorm/CMakeLists.linux-aarch64.txt @@ -0,0 +1,40 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_ragel5_bin + TOOL_ragel5_dependency + contrib/tools/ragel5/ragel + ragel5 +) +get_built_tool_path( + TOOL_rlgen-cd_bin + TOOL_rlgen-cd_dependency + contrib/tools/ragel5/rlgen-cd + rlgen-cd +) + +add_library(kernel-urlnorm) +target_link_libraries(kernel-urlnorm PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + library-cpp-cgiparam + cpp-digest-md5 + cpp-string_utils-base64 + cpp-string_utils-quote + cpp-string_utils-url + library-cpp-uri +) +target_sources(kernel-urlnorm PRIVATE + ${CMAKE_SOURCE_DIR}/kernel/urlnorm/host.cpp + ${CMAKE_SOURCE_DIR}/kernel/urlnorm/normalize.cpp + ${CMAKE_SOURCE_DIR}/kernel/urlnorm/urlnorm.cpp + ${CMAKE_SOURCE_DIR}/kernel/urlnorm/validate.cpp + ${CMAKE_BINARY_DIR}/kernel/urlnorm/urlhashval.rl5.cpp +) diff --git a/kernel/urlnorm/CMakeLists.linux-x86_64.txt b/kernel/urlnorm/CMakeLists.linux-x86_64.txt new file mode 100644 index 0000000000..d365d758ea --- /dev/null +++ b/kernel/urlnorm/CMakeLists.linux-x86_64.txt @@ -0,0 +1,40 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_ragel5_bin + TOOL_ragel5_dependency + contrib/tools/ragel5/ragel + ragel5 +) +get_built_tool_path( + TOOL_rlgen-cd_bin + TOOL_rlgen-cd_dependency + contrib/tools/ragel5/rlgen-cd + rlgen-cd +) + +add_library(kernel-urlnorm) +target_link_libraries(kernel-urlnorm PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + library-cpp-cgiparam + cpp-digest-md5 + cpp-string_utils-base64 + cpp-string_utils-quote + cpp-string_utils-url + library-cpp-uri +) +target_sources(kernel-urlnorm PRIVATE + ${CMAKE_SOURCE_DIR}/kernel/urlnorm/host.cpp + ${CMAKE_SOURCE_DIR}/kernel/urlnorm/normalize.cpp + ${CMAKE_SOURCE_DIR}/kernel/urlnorm/urlnorm.cpp + ${CMAKE_SOURCE_DIR}/kernel/urlnorm/validate.cpp + ${CMAKE_BINARY_DIR}/kernel/urlnorm/urlhashval.rl5.cpp +) diff --git a/kernel/urlnorm/CMakeLists.txt b/kernel/urlnorm/CMakeLists.txt new file mode 100644 index 0000000000..f8b31df0c1 --- /dev/null +++ b/kernel/urlnorm/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) + include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/kernel/urlnorm/CMakeLists.windows-x86_64.txt b/kernel/urlnorm/CMakeLists.windows-x86_64.txt new file mode 100644 index 0000000000..e3a91ac4ad --- /dev/null +++ b/kernel/urlnorm/CMakeLists.windows-x86_64.txt @@ -0,0 +1,39 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_ragel5_bin + TOOL_ragel5_dependency + contrib/tools/ragel5/ragel + ragel5 +) +get_built_tool_path( + TOOL_rlgen-cd_bin + TOOL_rlgen-cd_dependency + contrib/tools/ragel5/rlgen-cd + rlgen-cd +) + +add_library(kernel-urlnorm) +target_link_libraries(kernel-urlnorm PUBLIC + contrib-libs-cxxsupp + yutil + library-cpp-cgiparam + cpp-digest-md5 + cpp-string_utils-base64 + cpp-string_utils-quote + cpp-string_utils-url + library-cpp-uri +) +target_sources(kernel-urlnorm PRIVATE + ${CMAKE_SOURCE_DIR}/kernel/urlnorm/host.cpp + ${CMAKE_SOURCE_DIR}/kernel/urlnorm/normalize.cpp + ${CMAKE_SOURCE_DIR}/kernel/urlnorm/urlnorm.cpp + ${CMAKE_SOURCE_DIR}/kernel/urlnorm/validate.cpp + ${CMAKE_BINARY_DIR}/kernel/urlnorm/urlhashval.rl5.cpp +) diff --git a/library/cpp/CMakeLists.darwin-x86_64.txt b/library/cpp/CMakeLists.darwin-x86_64.txt index 772027a342..5497fd21be 100644 --- a/library/cpp/CMakeLists.darwin-x86_64.txt +++ b/library/cpp/CMakeLists.darwin-x86_64.txt @@ -36,6 +36,9 @@ add_subdirectory(disjoint_sets) add_subdirectory(dns) add_subdirectory(enumbitset) add_subdirectory(execprofile) +add_subdirectory(geo) +add_subdirectory(geobase) +add_subdirectory(geohash) add_subdirectory(getopt) add_subdirectory(grpc) add_subdirectory(histogram) @@ -44,9 +47,11 @@ add_subdirectory(http) add_subdirectory(hyperloglog) add_subdirectory(int128) add_subdirectory(ipmath) +add_subdirectory(ipreg) add_subdirectory(ipv6_address) add_subdirectory(iterator) add_subdirectory(json) +add_subdirectory(langmask) add_subdirectory(lcs) add_subdirectory(lfalloc) add_subdirectory(linear_regression) @@ -55,6 +60,7 @@ add_subdirectory(lua) add_subdirectory(lwtrace) add_subdirectory(malloc) add_subdirectory(messagebus) +add_subdirectory(microbdb) add_subdirectory(mime) add_subdirectory(monlib) add_subdirectory(on_disk) @@ -68,6 +74,8 @@ add_subdirectory(random_provider) add_subdirectory(regex) add_subdirectory(resource) add_subdirectory(retry) +add_subdirectory(reverse_geocoder) +add_subdirectory(robots_txt) add_subdirectory(sanitizer) add_subdirectory(scheme) add_subdirectory(sighandler) @@ -90,6 +98,7 @@ add_subdirectory(unified_agent_client) add_subdirectory(uri) add_subdirectory(xml) add_subdirectory(yaml) +add_subdirectory(yconf) add_subdirectory(yson) add_subdirectory(yson_pull) add_subdirectory(yt) diff --git a/library/cpp/CMakeLists.linux-aarch64.txt b/library/cpp/CMakeLists.linux-aarch64.txt index cd50b0e3a4..5e93629802 100644 --- a/library/cpp/CMakeLists.linux-aarch64.txt +++ b/library/cpp/CMakeLists.linux-aarch64.txt @@ -35,6 +35,9 @@ add_subdirectory(disjoint_sets) add_subdirectory(dns) add_subdirectory(enumbitset) add_subdirectory(execprofile) +add_subdirectory(geo) +add_subdirectory(geobase) +add_subdirectory(geohash) add_subdirectory(getopt) add_subdirectory(grpc) add_subdirectory(histogram) @@ -43,9 +46,11 @@ add_subdirectory(http) add_subdirectory(hyperloglog) add_subdirectory(int128) add_subdirectory(ipmath) +add_subdirectory(ipreg) add_subdirectory(ipv6_address) add_subdirectory(iterator) add_subdirectory(json) +add_subdirectory(langmask) add_subdirectory(lcs) add_subdirectory(lfalloc) add_subdirectory(linear_regression) @@ -54,6 +59,7 @@ add_subdirectory(lua) add_subdirectory(lwtrace) add_subdirectory(malloc) add_subdirectory(messagebus) +add_subdirectory(microbdb) add_subdirectory(mime) add_subdirectory(monlib) add_subdirectory(on_disk) @@ -67,6 +73,8 @@ add_subdirectory(random_provider) add_subdirectory(regex) add_subdirectory(resource) add_subdirectory(retry) +add_subdirectory(reverse_geocoder) +add_subdirectory(robots_txt) add_subdirectory(sanitizer) add_subdirectory(scheme) add_subdirectory(sighandler) @@ -89,6 +97,7 @@ add_subdirectory(unified_agent_client) add_subdirectory(uri) add_subdirectory(xml) add_subdirectory(yaml) +add_subdirectory(yconf) add_subdirectory(yson) add_subdirectory(yson_pull) add_subdirectory(yt) diff --git a/library/cpp/CMakeLists.linux-x86_64.txt b/library/cpp/CMakeLists.linux-x86_64.txt index 772027a342..5497fd21be 100644 --- a/library/cpp/CMakeLists.linux-x86_64.txt +++ b/library/cpp/CMakeLists.linux-x86_64.txt @@ -36,6 +36,9 @@ add_subdirectory(disjoint_sets) add_subdirectory(dns) add_subdirectory(enumbitset) add_subdirectory(execprofile) +add_subdirectory(geo) +add_subdirectory(geobase) +add_subdirectory(geohash) add_subdirectory(getopt) add_subdirectory(grpc) add_subdirectory(histogram) @@ -44,9 +47,11 @@ add_subdirectory(http) add_subdirectory(hyperloglog) add_subdirectory(int128) add_subdirectory(ipmath) +add_subdirectory(ipreg) add_subdirectory(ipv6_address) add_subdirectory(iterator) add_subdirectory(json) +add_subdirectory(langmask) add_subdirectory(lcs) add_subdirectory(lfalloc) add_subdirectory(linear_regression) @@ -55,6 +60,7 @@ add_subdirectory(lua) add_subdirectory(lwtrace) add_subdirectory(malloc) add_subdirectory(messagebus) +add_subdirectory(microbdb) add_subdirectory(mime) add_subdirectory(monlib) add_subdirectory(on_disk) @@ -68,6 +74,8 @@ add_subdirectory(random_provider) add_subdirectory(regex) add_subdirectory(resource) add_subdirectory(retry) +add_subdirectory(reverse_geocoder) +add_subdirectory(robots_txt) add_subdirectory(sanitizer) add_subdirectory(scheme) add_subdirectory(sighandler) @@ -90,6 +98,7 @@ add_subdirectory(unified_agent_client) add_subdirectory(uri) add_subdirectory(xml) add_subdirectory(yaml) +add_subdirectory(yconf) add_subdirectory(yson) add_subdirectory(yson_pull) add_subdirectory(yt) diff --git a/library/cpp/CMakeLists.windows-x86_64.txt b/library/cpp/CMakeLists.windows-x86_64.txt index 772027a342..5497fd21be 100644 --- a/library/cpp/CMakeLists.windows-x86_64.txt +++ b/library/cpp/CMakeLists.windows-x86_64.txt @@ -36,6 +36,9 @@ add_subdirectory(disjoint_sets) add_subdirectory(dns) add_subdirectory(enumbitset) add_subdirectory(execprofile) +add_subdirectory(geo) +add_subdirectory(geobase) +add_subdirectory(geohash) add_subdirectory(getopt) add_subdirectory(grpc) add_subdirectory(histogram) @@ -44,9 +47,11 @@ add_subdirectory(http) add_subdirectory(hyperloglog) add_subdirectory(int128) add_subdirectory(ipmath) +add_subdirectory(ipreg) add_subdirectory(ipv6_address) add_subdirectory(iterator) add_subdirectory(json) +add_subdirectory(langmask) add_subdirectory(lcs) add_subdirectory(lfalloc) add_subdirectory(linear_regression) @@ -55,6 +60,7 @@ add_subdirectory(lua) add_subdirectory(lwtrace) add_subdirectory(malloc) add_subdirectory(messagebus) +add_subdirectory(microbdb) add_subdirectory(mime) add_subdirectory(monlib) add_subdirectory(on_disk) @@ -68,6 +74,8 @@ add_subdirectory(random_provider) add_subdirectory(regex) add_subdirectory(resource) add_subdirectory(retry) +add_subdirectory(reverse_geocoder) +add_subdirectory(robots_txt) add_subdirectory(sanitizer) add_subdirectory(scheme) add_subdirectory(sighandler) @@ -90,6 +98,7 @@ add_subdirectory(unified_agent_client) add_subdirectory(uri) add_subdirectory(xml) add_subdirectory(yaml) +add_subdirectory(yconf) add_subdirectory(yson) add_subdirectory(yson_pull) add_subdirectory(yt) diff --git a/library/cpp/containers/CMakeLists.txt b/library/cpp/containers/CMakeLists.txt index 43fcbe8346..40f5013867 100644 --- a/library/cpp/containers/CMakeLists.txt +++ b/library/cpp/containers/CMakeLists.txt @@ -20,5 +20,6 @@ add_subdirectory(ring_buffer) add_subdirectory(sorted_vector) add_subdirectory(stack_array) add_subdirectory(stack_vector) +add_subdirectory(str_hash) add_subdirectory(str_map) add_subdirectory(top_keeper) diff --git a/library/cpp/containers/str_hash/CMakeLists.darwin-x86_64.txt b/library/cpp/containers/str_hash/CMakeLists.darwin-x86_64.txt new file mode 100644 index 0000000000..627814f0ed --- /dev/null +++ b/library/cpp/containers/str_hash/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,19 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(cpp-containers-str_hash) +target_link_libraries(cpp-containers-str_hash PUBLIC + contrib-libs-cxxsupp + yutil + library-cpp-charset + cpp-containers-str_map +) +target_sources(cpp-containers-str_hash PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/containers/str_hash/str_hash.cpp +) diff --git a/library/cpp/containers/str_hash/CMakeLists.linux-aarch64.txt b/library/cpp/containers/str_hash/CMakeLists.linux-aarch64.txt new file mode 100644 index 0000000000..cd723cbea2 --- /dev/null +++ b/library/cpp/containers/str_hash/CMakeLists.linux-aarch64.txt @@ -0,0 +1,20 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(cpp-containers-str_hash) +target_link_libraries(cpp-containers-str_hash PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + library-cpp-charset + cpp-containers-str_map +) +target_sources(cpp-containers-str_hash PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/containers/str_hash/str_hash.cpp +) diff --git a/library/cpp/containers/str_hash/CMakeLists.linux-x86_64.txt b/library/cpp/containers/str_hash/CMakeLists.linux-x86_64.txt new file mode 100644 index 0000000000..cd723cbea2 --- /dev/null +++ b/library/cpp/containers/str_hash/CMakeLists.linux-x86_64.txt @@ -0,0 +1,20 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(cpp-containers-str_hash) +target_link_libraries(cpp-containers-str_hash PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + library-cpp-charset + cpp-containers-str_map +) +target_sources(cpp-containers-str_hash PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/containers/str_hash/str_hash.cpp +) diff --git a/library/cpp/containers/str_hash/CMakeLists.txt b/library/cpp/containers/str_hash/CMakeLists.txt new file mode 100644 index 0000000000..f8b31df0c1 --- /dev/null +++ b/library/cpp/containers/str_hash/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) + include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/library/cpp/containers/str_hash/CMakeLists.windows-x86_64.txt b/library/cpp/containers/str_hash/CMakeLists.windows-x86_64.txt new file mode 100644 index 0000000000..627814f0ed --- /dev/null +++ b/library/cpp/containers/str_hash/CMakeLists.windows-x86_64.txt @@ -0,0 +1,19 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(cpp-containers-str_hash) +target_link_libraries(cpp-containers-str_hash PUBLIC + contrib-libs-cxxsupp + yutil + library-cpp-charset + cpp-containers-str_map +) +target_sources(cpp-containers-str_hash PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/containers/str_hash/str_hash.cpp +) diff --git a/library/cpp/containers/str_hash/str_hash.cpp b/library/cpp/containers/str_hash/str_hash.cpp new file mode 100644 index 0000000000..1298638533 --- /dev/null +++ b/library/cpp/containers/str_hash/str_hash.cpp @@ -0,0 +1,60 @@ +#include "str_hash.h" + +#include <library/cpp/charset/ci_string.h> +#include <util/stream/output.h> +#include <util/stream/input.h> + +HashSet::HashSet(const char** array, size_type size) { + Resize(size); + while (*array && **array) + AddPermanent(*array++); +} + +void HashSet::Read(IInputStream* input) { + TString s; + + while (input->ReadLine(s)) { + AddUniq(TCiString(s).c_str()); + } +} + +void HashSet::Write(IOutputStream* output) const { + for (const auto& it : *this) { + *output << it.first << "\n"; + } +} + +#ifdef TEST_STRHASH +#include <ctime> +#include <fstream> +#include <cstdio> +#include <cstdlib> + +using namespace std; + +int main(int argc, char* argv[]) { + if (argc < 2) { + printf("usage: stoplist <stop-words file ...\n"); + exit(EXIT_FAILURE); // FreeBSD: EX_USAGE + } + Hash hash; + hash.Read(cin); + for (--argc, ++argv; argc > 0; --argc, ++argv) { + ifstream input(argv[0]); + if (!input.good()) { + perror(argv[0]); + continue; + } + TCiString s; + while (input >> s) { + if (!hash.Has(s)) + cout << s << "\n"; + else + cout << "[[" << s << "]]" + << "\n"; + } + } + return EXIT_SUCCESS; // EX_OK +} + +#endif diff --git a/library/cpp/containers/str_hash/str_hash.h b/library/cpp/containers/str_hash/str_hash.h new file mode 100644 index 0000000000..25f960dbb5 --- /dev/null +++ b/library/cpp/containers/str_hash/str_hash.h @@ -0,0 +1,181 @@ +#pragma once + +#include <library/cpp/containers/str_map/str_map.h> +#include <library/cpp/charset/ci_string.h> +#include <util/system/yassert.h> +#include <util/memory/tempbuf.h> + +#include <memory> + +class IInputStream; +class IOutputStream; + +template <class T, class Alloc = std::allocator<const char*>> +class Hash; + +struct yvoid { + yvoid() = default; +}; + +template <typename T, class Alloc> +class Hash: public string_hash<T, ci_hash, ci_equal_to, Alloc> { + using ci_string_hash = string_hash<T, ci_hash, ci_equal_to, Alloc>; + +protected: + using ci_string_hash::pool; + +public: + using size_type = typename ci_string_hash::size_type; + using const_iterator = typename ci_string_hash::const_iterator; + using iterator = typename ci_string_hash::iterator; + using value_type = typename ci_string_hash::value_type; + using ci_string_hash::begin; + using ci_string_hash::end; + using ci_string_hash::find; + using ci_string_hash::size; + + Hash() + : ci_string_hash() + { + } + explicit Hash(size_type theSize) + : ci_string_hash(theSize, theSize * AVERAGEWORD_BUF) + { + } + Hash(const char** strings, size_type size = 0, T* = 0); // must end with NULL or "\0" + virtual ~Hash(); + bool Has(const char* s, size_t len, T* pp = nullptr) const; + bool Has(const char* s, T* pp = nullptr) const { + const_iterator it; + if ((it = find(s)) == end()) + return false; + else if (pp) + *pp = (*it).second; + return true; + } + void Add(const char* s, T data) { + // in fact it is the same insert_unique as in AddUnique. + // it's impossible to have _FAST_ version of insert() in 'hash_map' + + // you have to use 'hash_mmap' to get the _kind_ of desired effect. + // BUT still there will be "Checks" inside - + // to make the same keys close to each other (see insert_equal()) + this->insert_copy(s, data); + } + bool AddUniq(const char* s, T data) { + return this->insert_copy(s, data).second; + } + // new function to get rid of allocations completely! -- e.g. in constructors + void AddPermanent(const char* s, T data) { + this->insert(value_type(s, data)); + } + T Detach(const char* s) { + iterator it = find(s); + if (it == end()) + return T(); + T data = (*it).second; + this->erase(it); + return data; + } + size_type NumEntries() const { + return size(); + } + bool ForEach(bool (*func)(const char* key, T data, void* cookie), void* cookie = nullptr); + void Resize(size_type theSize) { + this->reserve(theSize); + // no pool resizing here. + } + virtual void Clear(); + char* Pool() { + if (pool.Size() < 2 || pool.End()[-2] != '\0') + pool.Append("\0", 1); + return pool.Begin(); + } +}; + +template <class T, class Alloc> +Hash<T, Alloc>::Hash(const char** array, size_type theSize, T* data) { + // must end with NULL or "\0" + Y_ASSERT(data != nullptr); + Resize(theSize); + while (*array && **array) + AddPermanent(*array++, *data++); +} + +template <class T, class Alloc> +bool Hash<T, Alloc>::Has(const char* s, size_t len, T* pp) const { + TTempArray<char> buf(len + 1); + char* const allocated = buf.Data(); + memcpy(allocated, s, len); + allocated[len] = '\x00'; + return Has(allocated, pp); +} + +template <class T, class Alloc> +Hash<T, Alloc>::~Hash() { + Clear(); +} + +template <class T, class Alloc> +void Hash<T, Alloc>::Clear() { + ci_string_hash::clear_hash(); // to make the key pool empty +} + +template <class T, class Alloc> +bool Hash<T, Alloc>::ForEach(bool (*func)(const char* key, T data, void* cookie), void* cookie) { + for (const_iterator it = begin(); it != end(); ++it) + if (!func((*it).first, (*it).second, cookie)) + return false; + return true; +} + +class HashSet: public Hash<yvoid> { +public: + HashSet(const char** array, size_type size = 0); + HashSet() + : Hash<yvoid>() + { + } + void Read(IInputStream* input); + void Write(IOutputStream* output) const; + void Add(const char* s) { + // in fact it is the same insert_unique as in AddUnique. + // it's impossible to have _FAST_ version of insert() in 'hash_map' + + // you have to use 'hash_mmap' to get the _kind_ of desired effect. + // BUT still there will be "Checks" inside - + // to make the same keys close to each other (see insert_equal()) + insert_copy(s, yvoid()); + } + bool AddUniq(const char* s) { + return insert_copy(s, yvoid()).second; + } + // new function to get rid of allocations completely! -- e.g. in constructors + void AddPermanent(const char* s) { + insert(value_type(s, yvoid())); + } +}; + +template <class T, class HashFcn = THash<T>, class EqualKey = TEqualTo<T>, class Alloc = std::allocator<T>> +class TStaticHash: private THashMap<T, T, HashFcn, EqualKey> { +private: + using TBase = THashMap<T, T, HashFcn, EqualKey>; + +public: + TStaticHash(T arr[][2], size_t size) { + TBase::reserve(size); + while (size) { + TBase::insert(typename TBase::value_type(arr[0][0], arr[0][1])); + arr++; + size--; + } + } + T operator[](const T& key) const { // !!! it is not lvalue nor it used to be + typename TBase::const_iterator it = TBase::find(key); + if (it == TBase::end()) + return nullptr; + return it->second; + } +}; + +using TStHash = TStaticHash<const char*, ci_hash, ci_equal_to>; diff --git a/library/cpp/containers/str_hash/ya.make b/library/cpp/containers/str_hash/ya.make new file mode 100644 index 0000000000..f7e24316b9 --- /dev/null +++ b/library/cpp/containers/str_hash/ya.make @@ -0,0 +1,12 @@ +LIBRARY() + +PEERDIR( + library/cpp/charset + library/cpp/containers/str_map +) + +SRCS( + str_hash.cpp +) + +END() diff --git a/library/cpp/deprecated/CMakeLists.txt b/library/cpp/deprecated/CMakeLists.txt index ad818e3662..765ea6aad7 100644 --- a/library/cpp/deprecated/CMakeLists.txt +++ b/library/cpp/deprecated/CMakeLists.txt @@ -8,6 +8,10 @@ add_subdirectory(accessors) add_subdirectory(atomic) +add_subdirectory(autoarray) +add_subdirectory(datafile) add_subdirectory(enum_codegen) +add_subdirectory(fgood) add_subdirectory(kmp) +add_subdirectory(mapped_file) add_subdirectory(split) diff --git a/library/cpp/deprecated/autoarray/CMakeLists.darwin-x86_64.txt b/library/cpp/deprecated/autoarray/CMakeLists.darwin-x86_64.txt new file mode 100644 index 0000000000..f2a246218c --- /dev/null +++ b/library/cpp/deprecated/autoarray/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(cpp-deprecated-autoarray) +target_link_libraries(cpp-deprecated-autoarray PUBLIC + contrib-libs-cxxsupp + yutil +) +target_sources(cpp-deprecated-autoarray PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/deprecated/autoarray/autoarray.cpp +) diff --git a/library/cpp/deprecated/autoarray/CMakeLists.linux-aarch64.txt b/library/cpp/deprecated/autoarray/CMakeLists.linux-aarch64.txt new file mode 100644 index 0000000000..2411a48cd3 --- /dev/null +++ b/library/cpp/deprecated/autoarray/CMakeLists.linux-aarch64.txt @@ -0,0 +1,18 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(cpp-deprecated-autoarray) +target_link_libraries(cpp-deprecated-autoarray PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil +) +target_sources(cpp-deprecated-autoarray PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/deprecated/autoarray/autoarray.cpp +) diff --git a/library/cpp/deprecated/autoarray/CMakeLists.linux-x86_64.txt b/library/cpp/deprecated/autoarray/CMakeLists.linux-x86_64.txt new file mode 100644 index 0000000000..2411a48cd3 --- /dev/null +++ b/library/cpp/deprecated/autoarray/CMakeLists.linux-x86_64.txt @@ -0,0 +1,18 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(cpp-deprecated-autoarray) +target_link_libraries(cpp-deprecated-autoarray PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil +) +target_sources(cpp-deprecated-autoarray PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/deprecated/autoarray/autoarray.cpp +) diff --git a/library/cpp/deprecated/autoarray/CMakeLists.txt b/library/cpp/deprecated/autoarray/CMakeLists.txt new file mode 100644 index 0000000000..f8b31df0c1 --- /dev/null +++ b/library/cpp/deprecated/autoarray/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) + include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/library/cpp/deprecated/autoarray/CMakeLists.windows-x86_64.txt b/library/cpp/deprecated/autoarray/CMakeLists.windows-x86_64.txt new file mode 100644 index 0000000000..f2a246218c --- /dev/null +++ b/library/cpp/deprecated/autoarray/CMakeLists.windows-x86_64.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(cpp-deprecated-autoarray) +target_link_libraries(cpp-deprecated-autoarray PUBLIC + contrib-libs-cxxsupp + yutil +) +target_sources(cpp-deprecated-autoarray PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/deprecated/autoarray/autoarray.cpp +) diff --git a/library/cpp/deprecated/autoarray/README.md b/library/cpp/deprecated/autoarray/README.md new file mode 100644 index 0000000000..1d83147cee --- /dev/null +++ b/library/cpp/deprecated/autoarray/README.md @@ -0,0 +1,3 @@ +Pre-C++11 vector-like container. + +Just use std::vector. If you need to fill your vector with custom-constructed data, use reserve+emplace_back (but make sure that your elements are movable). diff --git a/library/cpp/deprecated/autoarray/autoarray.cpp b/library/cpp/deprecated/autoarray/autoarray.cpp new file mode 100644 index 0000000000..15167f27f6 --- /dev/null +++ b/library/cpp/deprecated/autoarray/autoarray.cpp @@ -0,0 +1 @@ +#include "autoarray.h" diff --git a/library/cpp/deprecated/autoarray/autoarray.h b/library/cpp/deprecated/autoarray/autoarray.h new file mode 100644 index 0000000000..2aa12c5916 --- /dev/null +++ b/library/cpp/deprecated/autoarray/autoarray.h @@ -0,0 +1,264 @@ +#pragma once + +#include <util/system/compat.h> +#include <util/system/yassert.h> +#include <util/system/defaults.h> +#include <util/system/sys_alloc.h> + +#include <util/generic/typetraits.h> +#include <utility> + +#include <new> +#include <util/generic/noncopyable.h> + +struct autoarray_getindex { + autoarray_getindex() = default; +}; + +struct aarr_b0 { + aarr_b0() = default; +}; + +struct aarr_nofill { + aarr_nofill() = default; +}; + +template <typename T> +struct ynd_type_traits { + enum { + empty_destructor = TTypeTraits<T>::IsPod, + }; +}; + +template <class T> +class autoarray : TNonCopyable { +protected: + T* arr; + size_t _size; + +private: + void AllocBuf(size_t siz) { + arr = nullptr; + _size = 0; + if (siz) { + arr = (T*)y_allocate(sizeof(T) * siz); + _size = siz; + } + } + +public: + using value_type = T; + using iterator = T*; + using const_iterator = const T*; + + autoarray() + : arr(nullptr) + , _size(0) + { + } + autoarray(size_t siz) { + AllocBuf(siz); + T* curr = arr; + try { + for (T* end = arr + _size; curr != end; ++curr) + new (curr) T(); + } catch (...) { + for (--curr; curr >= arr; --curr) + curr->~T(); + y_deallocate(arr); + throw; + } + } + template <class A> + explicit autoarray(size_t siz, A& fill) { + AllocBuf(siz); + T* curr = arr; + try { + for (T* end = arr + _size; curr != end; ++curr) + new (curr) T(fill); + } catch (...) { + for (--curr; curr >= arr; --curr) + curr->~T(); + y_deallocate(arr); + throw; + } + } + explicit autoarray(size_t siz, autoarray_getindex) { + AllocBuf(siz); + size_t nCurrent = 0; + try { + for (nCurrent = 0; nCurrent < _size; ++nCurrent) + new (&arr[nCurrent]) T(nCurrent); + } catch (...) { + for (size_t n = 0; n < nCurrent; ++n) + arr[n].~T(); + y_deallocate(arr); + throw; + } + } + explicit autoarray(size_t siz, aarr_b0) { + AllocBuf(siz); + memset(arr, 0, _size * sizeof(T)); + } + explicit autoarray(size_t siz, aarr_nofill) { + AllocBuf(siz); + } + template <class A> + explicit autoarray(const A* fill, size_t siz) { + AllocBuf(siz); + size_t nCurrent = 0; + try { + for (nCurrent = 0; nCurrent < _size; ++nCurrent) + new (&arr[nCurrent]) T(fill[nCurrent]); + } catch (...) { + for (size_t n = 0; n < nCurrent; ++n) + arr[n].~T(); + y_deallocate(arr); + throw; + } + } + template <class A, class B> + explicit autoarray(const A* fill, const B* cfill, size_t siz) { + AllocBuf(siz); + size_t nCurrent = 0; + try { + for (nCurrent = 0; nCurrent < _size; ++nCurrent) + new (&arr[nCurrent]) T(fill[nCurrent], cfill); + } catch (...) { + for (size_t n = 0; n < nCurrent; ++n) + arr[n].~T(); + y_deallocate(arr); + throw; + } + } + template <class A> + explicit autoarray(const A* fill, size_t initsiz, size_t fullsiz) { + AllocBuf(fullsiz); + size_t nCurrent = 0; + try { + for (nCurrent = 0; nCurrent < ((initsiz < _size) ? initsiz : _size); ++nCurrent) + new (&arr[nCurrent]) T(fill[nCurrent]); + for (; nCurrent < _size; ++nCurrent) + new (&arr[nCurrent]) T(); + } catch (...) { + for (size_t n = 0; n < nCurrent; ++n) + arr[n].~T(); + y_deallocate(arr); + throw; + } + } + template <class A> + explicit autoarray(const A* fill, size_t initsiz, size_t fullsiz, const T& dummy) { + AllocBuf(fullsiz); + size_t nCurrent = 0; + try { + for (nCurrent = 0; nCurrent < ((initsiz < _size) ? initsiz : _size); ++nCurrent) + new (&arr[nCurrent]) T(fill[nCurrent]); + for (; nCurrent < _size; ++nCurrent) + new (&arr[nCurrent]) T(dummy); + } catch (...) { + for (size_t n = 0; n < nCurrent; ++n) + arr[n].~T(); + y_deallocate(arr); + throw; + } + } + + template <class... R> + explicit autoarray(size_t siz, R&&... fill) { + AllocBuf(siz); + T* curr = arr; + try { + for (T* end = arr + _size; curr != end; ++curr) + new (curr) T(std::forward<R>(fill)...); + } catch (...) { + for (--curr; curr >= arr; --curr) + curr->~T(); + y_deallocate(arr); + throw; + } + } + ~autoarray() { + if (_size) { + if (!ynd_type_traits<T>::empty_destructor) + for (T *curr = arr, *end = arr + _size; curr != end; ++curr) + curr->~T(); + y_deallocate(arr); + } + } + T& operator[](size_t pos) { + Y_ASSERT(pos < _size); + return arr[pos]; + } + const T& operator[](size_t pos) const { + Y_ASSERT(pos < _size); + return arr[pos]; + } + size_t size() const { + return _size; + } + void swap(autoarray& with) { + T* tmp_arr = arr; + size_t tmp_size = _size; + arr = with.arr; + _size = with._size; + with.arr = tmp_arr; + with._size = tmp_size; + } + void resize(size_t siz) { + autoarray<T> tmp(arr, _size, siz); + swap(tmp); + } + void resize(size_t siz, const T& dummy) { + autoarray<T> tmp(arr, _size, siz, dummy); + swap(tmp); + } + T* rawpointer() { + return arr; + } + const T* operator~() const { + return arr; + } + T* begin() { + return arr; + } + T* end() { + return arr + _size; + } + T& back() { + Y_ASSERT(_size); + return arr[_size - 1]; + } + bool empty() const { + return !_size; + } + bool operator!() const { + return !_size; + } + size_t operator+() const { + return _size; + } + const T* begin() const { + return arr; + } + const T* end() const { + return arr + _size; + } + const T& back() const { + Y_ASSERT(_size); + return arr[_size - 1]; + } + //operator T*() { return arr; } +}; + +template <class T> +inline bool operator==(const autoarray<T>& a, const autoarray<T>& b) { + size_t count = a.size(); + if (count != b.size()) + return false; + for (size_t i = 0; i < count; ++i) { + if (a[i] != b[i]) + return false; + } + return true; +} diff --git a/library/cpp/deprecated/autoarray/ya.make b/library/cpp/deprecated/autoarray/ya.make new file mode 100644 index 0000000000..4b055f8c29 --- /dev/null +++ b/library/cpp/deprecated/autoarray/ya.make @@ -0,0 +1,7 @@ +LIBRARY() + +SRCS( + autoarray.cpp +) + +END() diff --git a/library/cpp/deprecated/datafile/CMakeLists.darwin-x86_64.txt b/library/cpp/deprecated/datafile/CMakeLists.darwin-x86_64.txt new file mode 100644 index 0000000000..3f88f788da --- /dev/null +++ b/library/cpp/deprecated/datafile/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,19 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(cpp-deprecated-datafile) +target_link_libraries(cpp-deprecated-datafile PUBLIC + contrib-libs-cxxsupp + yutil + cpp-deprecated-mapped_file +) +target_sources(cpp-deprecated-datafile PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/deprecated/datafile/datafile.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/deprecated/datafile/loadmode.cpp +) diff --git a/library/cpp/deprecated/datafile/CMakeLists.linux-aarch64.txt b/library/cpp/deprecated/datafile/CMakeLists.linux-aarch64.txt new file mode 100644 index 0000000000..43da9ae45a --- /dev/null +++ b/library/cpp/deprecated/datafile/CMakeLists.linux-aarch64.txt @@ -0,0 +1,20 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(cpp-deprecated-datafile) +target_link_libraries(cpp-deprecated-datafile PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + cpp-deprecated-mapped_file +) +target_sources(cpp-deprecated-datafile PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/deprecated/datafile/datafile.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/deprecated/datafile/loadmode.cpp +) diff --git a/library/cpp/deprecated/datafile/CMakeLists.linux-x86_64.txt b/library/cpp/deprecated/datafile/CMakeLists.linux-x86_64.txt new file mode 100644 index 0000000000..43da9ae45a --- /dev/null +++ b/library/cpp/deprecated/datafile/CMakeLists.linux-x86_64.txt @@ -0,0 +1,20 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(cpp-deprecated-datafile) +target_link_libraries(cpp-deprecated-datafile PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + cpp-deprecated-mapped_file +) +target_sources(cpp-deprecated-datafile PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/deprecated/datafile/datafile.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/deprecated/datafile/loadmode.cpp +) diff --git a/library/cpp/deprecated/datafile/CMakeLists.txt b/library/cpp/deprecated/datafile/CMakeLists.txt new file mode 100644 index 0000000000..f8b31df0c1 --- /dev/null +++ b/library/cpp/deprecated/datafile/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) + include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/library/cpp/deprecated/datafile/CMakeLists.windows-x86_64.txt b/library/cpp/deprecated/datafile/CMakeLists.windows-x86_64.txt new file mode 100644 index 0000000000..3f88f788da --- /dev/null +++ b/library/cpp/deprecated/datafile/CMakeLists.windows-x86_64.txt @@ -0,0 +1,19 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(cpp-deprecated-datafile) +target_link_libraries(cpp-deprecated-datafile PUBLIC + contrib-libs-cxxsupp + yutil + cpp-deprecated-mapped_file +) +target_sources(cpp-deprecated-datafile PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/deprecated/datafile/datafile.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/deprecated/datafile/loadmode.cpp +) diff --git a/library/cpp/deprecated/datafile/README.md b/library/cpp/deprecated/datafile/README.md new file mode 100644 index 0000000000..7f8547108e --- /dev/null +++ b/library/cpp/deprecated/datafile/README.md @@ -0,0 +1,3 @@ +A wrapper on top of some user-defined custom file format. + +Just write your own if you need it. It's going to be way easier than figuring out how to use this one. diff --git a/library/cpp/deprecated/datafile/datafile.cpp b/library/cpp/deprecated/datafile/datafile.cpp new file mode 100644 index 0000000000..ff93f11c6b --- /dev/null +++ b/library/cpp/deprecated/datafile/datafile.cpp @@ -0,0 +1,42 @@ +#include "datafile.h" + +void TDataFileBase::DoLoad(const char* fname, int loadMode) { + Destroy(); + TFile f(fname, RdOnly); + DoLoad(f, loadMode, nullptr, 0); +} + +void TDataFileBase::DoLoad(TFile& f, int loadMode, void* hdrPtr, size_t hdrSize) { + if (hdrPtr) { + if (loadMode & DLM_EXACT_SIZE && f.GetLength() != (i64)Length) + throw yexception() << f.GetName() << " size does not match its header value"; + } else { + Length = f.GetLength(); + hdrSize = 0; + } + if ((loadMode & DLM_LD_TYPE_MASK) == DLM_READ) { + MemData = TVector<char>(Length); + memcpy(MemData.begin(), hdrPtr, hdrSize); + f.Load(MemData.begin() + hdrSize, Length - hdrSize); + Start = MemData.begin(); + } else { + FileData.init(f); + if (FileData.getSize() < Length) + throw yexception() << f.GetName() << " is smaller than what its header value says"; + if ((loadMode & DLM_LD_TYPE_MASK) == DLM_MMAP_PRC) + FileData.precharge(); + Start = (const char*)FileData.getData(); + } +} + +void TDataFileBase::Destroy() { + TVector<char>().swap(MemData); + FileData.term(); + Start = nullptr; + Length = 0; +} + +void TDataFileBase::Precharge() const { + if (Length && Start == (char*)FileData.getData()) + FileData.precharge(); +} diff --git a/library/cpp/deprecated/datafile/datafile.h b/library/cpp/deprecated/datafile/datafile.h new file mode 100644 index 0000000000..a438baceca --- /dev/null +++ b/library/cpp/deprecated/datafile/datafile.h @@ -0,0 +1,88 @@ +#pragma once + +#include "loadmode.h" + +#include <library/cpp/deprecated/mapped_file/mapped_file.h> + +#include <util/generic/vector.h> +#include <util/system/file.h> +#include <util/system/filemap.h> + +/** Simple helper that allows a file to be either mapped or read into malloc'ed memory. + This behaviour is controlled by EDataLoadMode enum defined in loadmode.h. + Unlike TBlob it provides Precharge() function and simple file size - based integrity check. + + To use this code, inherit your class from TDataFile<TFileHeader>. + TFileHeader must be a pod-type structure with byte layout of the file header. + File must start with that header. + TFileHeader must have FileSize() member function that determines expected file size or + length of data that need to be read from the beginning of file. + */ + +class TDataFileBase { +protected: + TVector<char> MemData; + TMappedFile FileData; + + const char* Start; + size_t Length; + + TDataFileBase() + : Start(nullptr) + , Length(0) + { + } + + void DoLoad(TFile& f, int loadMode, void* hdrPtr, size_t hdrSize); + void DoLoad(const char* fname, int loadMode); // just whole file + void Destroy(); + void swap(TDataFileBase& with) { + MemData.swap(with.MemData); + FileData.swap(with.FileData); + DoSwap(Start, with.Start); + DoSwap(Length, with.Length); + } + +public: + void Precharge() const; +}; + +template <class TFileHeader> +class TDataFile: public TDataFileBase { +protected: + void Load(const char* fname, EDataLoadMode loadMode) { + Destroy(); + TFile f(fname, RdOnly | Seq); + TFileHeader hdr; + f.Load(&hdr, sizeof(hdr)); + Length = hdr.FileSize(); + DoLoad(f, (int)loadMode, &hdr, sizeof(hdr)); + } + const TFileHeader& Hdr() const { + return *(TFileHeader*)Start; + } +}; + +// Use: class TFoo: public TDataFileEx<Foo> {...}; +// Additional requrement: TFileHeader must have Validate(fname) function that throws exception. +// Class TUser itself must have Init(fname) function +// Adds Load() function to your class (TUser) +template <class TUser, class TFileHeader> +class TDataFileEx: public TDataFile<TFileHeader> { +private: + using TBase = TDataFile<TFileHeader>; + TUser& User() const { + return *(TUser*)this; + } + +public: + TDataFileEx(const char* fname, EDataLoadMode loadMode = DLM_DEFAULT) { + if (fname) + Load(fname, loadMode); + } + void Load(const char* fname, EDataLoadMode loadMode = DLM_DEFAULT) { + TBase::Load(fname, loadMode); + TBase::Hdr().Validate(fname); + User().Init(fname); + } +}; diff --git a/library/cpp/deprecated/datafile/loadmode.cpp b/library/cpp/deprecated/datafile/loadmode.cpp new file mode 100644 index 0000000000..a857830326 --- /dev/null +++ b/library/cpp/deprecated/datafile/loadmode.cpp @@ -0,0 +1 @@ +#include "loadmode.h" diff --git a/library/cpp/deprecated/datafile/loadmode.h b/library/cpp/deprecated/datafile/loadmode.h new file mode 100644 index 0000000000..f04054dd64 --- /dev/null +++ b/library/cpp/deprecated/datafile/loadmode.h @@ -0,0 +1,20 @@ +#pragma once + +// It is recommended to support all reasonal value combinations via this enum, +// to let Load() function argument be of EDataLoadMode type, not just int type + +enum EDataLoadMode { + DLM_READ = 0, + DLM_MMAP_PRC = 1, // precharge + DLM_MMAP = 2, // w/o precharge + DLM_MMAP_AUTO_PRC = 3, // precharge automatically (same as DLM_MMAP unless specifically supported) + DLM_LD_TYPE_MASK = 15, + DLM_EXACT_SIZE = 16, // fail if input file is larger than what header says + + DLM_READ_ESZ = DLM_READ | DLM_EXACT_SIZE, + DLM_MMAP_PRC_ESZ = DLM_MMAP_PRC | DLM_EXACT_SIZE, + DLM_MMAP_ESZ = DLM_MMAP | DLM_EXACT_SIZE, + DLM_MMAP_APRC_ESZ = DLM_MMAP_AUTO_PRC | DLM_EXACT_SIZE, + + DLM_DEFAULT = DLM_MMAP_PRC_ESZ, +}; diff --git a/library/cpp/deprecated/datafile/ya.make b/library/cpp/deprecated/datafile/ya.make new file mode 100644 index 0000000000..1ad4fe9bc7 --- /dev/null +++ b/library/cpp/deprecated/datafile/ya.make @@ -0,0 +1,12 @@ +LIBRARY() + +SRCS( + datafile.cpp + loadmode.cpp +) + +PEERDIR( + library/cpp/deprecated/mapped_file +) + +END() diff --git a/library/cpp/deprecated/fgood/CMakeLists.darwin-x86_64.txt b/library/cpp/deprecated/fgood/CMakeLists.darwin-x86_64.txt new file mode 100644 index 0000000000..a82750e559 --- /dev/null +++ b/library/cpp/deprecated/fgood/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,18 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(cpp-deprecated-fgood) +target_link_libraries(cpp-deprecated-fgood PUBLIC + contrib-libs-cxxsupp + yutil +) +target_sources(cpp-deprecated-fgood PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/deprecated/fgood/ffb.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/deprecated/fgood/fgood.cpp +) diff --git a/library/cpp/deprecated/fgood/CMakeLists.linux-aarch64.txt b/library/cpp/deprecated/fgood/CMakeLists.linux-aarch64.txt new file mode 100644 index 0000000000..52e29348fd --- /dev/null +++ b/library/cpp/deprecated/fgood/CMakeLists.linux-aarch64.txt @@ -0,0 +1,19 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(cpp-deprecated-fgood) +target_link_libraries(cpp-deprecated-fgood PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil +) +target_sources(cpp-deprecated-fgood PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/deprecated/fgood/ffb.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/deprecated/fgood/fgood.cpp +) diff --git a/library/cpp/deprecated/fgood/CMakeLists.linux-x86_64.txt b/library/cpp/deprecated/fgood/CMakeLists.linux-x86_64.txt new file mode 100644 index 0000000000..52e29348fd --- /dev/null +++ b/library/cpp/deprecated/fgood/CMakeLists.linux-x86_64.txt @@ -0,0 +1,19 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(cpp-deprecated-fgood) +target_link_libraries(cpp-deprecated-fgood PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil +) +target_sources(cpp-deprecated-fgood PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/deprecated/fgood/ffb.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/deprecated/fgood/fgood.cpp +) diff --git a/library/cpp/deprecated/fgood/CMakeLists.txt b/library/cpp/deprecated/fgood/CMakeLists.txt new file mode 100644 index 0000000000..f8b31df0c1 --- /dev/null +++ b/library/cpp/deprecated/fgood/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) + include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/library/cpp/deprecated/fgood/CMakeLists.windows-x86_64.txt b/library/cpp/deprecated/fgood/CMakeLists.windows-x86_64.txt new file mode 100644 index 0000000000..a82750e559 --- /dev/null +++ b/library/cpp/deprecated/fgood/CMakeLists.windows-x86_64.txt @@ -0,0 +1,18 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(cpp-deprecated-fgood) +target_link_libraries(cpp-deprecated-fgood PUBLIC + contrib-libs-cxxsupp + yutil +) +target_sources(cpp-deprecated-fgood PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/deprecated/fgood/ffb.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/deprecated/fgood/fgood.cpp +) diff --git a/library/cpp/deprecated/fgood/README.md b/library/cpp/deprecated/fgood/README.md new file mode 100644 index 0000000000..4f66289657 --- /dev/null +++ b/library/cpp/deprecated/fgood/README.md @@ -0,0 +1,15 @@ +Some ancient wrappers on top of FILE*, and some string manupulation functions. + +Alternatives are as follows. + +For TFILEPtr. Use TIFStream or TOFStream if you need IO. For some rare use cases a TFileMap might also do. + +For fput/fget/getline. Use streams API. + +For struct ffb and struct prnstr. Just don't use them. Even if you can figure out what they do. + +For sf family of functions and TLineSplitter. Just use Split* from util/string/split.h + +For TSFReader. Use TMapTsvFile. + +For read_or_die family of functions. Use streams API. diff --git a/library/cpp/deprecated/fgood/ffb.cpp b/library/cpp/deprecated/fgood/ffb.cpp new file mode 100644 index 0000000000..aa9da861a6 --- /dev/null +++ b/library/cpp/deprecated/fgood/ffb.cpp @@ -0,0 +1,407 @@ +#include "ffb.h" + +#include <util/string/util.h> // str_spn +#include <util/system/compat.h> +#include <util/generic/yexception.h> + +#include <cstdio> +#include <algorithm> + +#include <ctype.h> + +#ifdef _win_ +#include <io.h> +#else +#include <unistd.h> +#endif + +ffb::ffb(FILE* file) + : TFILEPtr(file) +{ + if (file && !isatty(fileno(file)) && BUFSIZ < 512 * 1024) + setvbuf(file, nullptr, _IOFBF, 512 * 1024); +} + +void ffb::operator=(FILE* f) { + TFILEPtr::operator=(f); + if (f && !isatty(fileno(f)) && BUFSIZ < 512 * 1024) + setvbuf(f, nullptr, _IOFBF, 512 * 1024); +} + +void ffb::open(const char* name, const char* mode) { + TFILEPtr::open(name, mode); + if (!isatty(fileno(*this)) && BUFSIZ < 512 * 1024) + setvbuf(*this, nullptr, _IOFBF, 512 * 1024); +} + +int sf(char** fb, char* buf) { //don't want to call sf(fb, buf, 32) + if (!(*buf && *buf != 10)) { + *fb = nullptr; + return 0; + } + int n = 1; + fb[0] = buf; + while (*buf && *buf != 10 && n < 31) { + if (*buf == '\t') { + *buf++ = 0; + fb[n++] = buf; + continue; + } + buf++; + } + if (*buf == 10 && buf[-1] == 13) + buf[-1] = 0; + *buf = 0; + fb[n] = nullptr; + return n; +} + +int sf(char** fb, char* buf, size_t fb_sz) { + if (!(*buf && *buf != 10)) { + *fb = nullptr; + return 0; + } + fb_sz--; + int n = 1; + fb[0] = buf; + while (*buf && *buf != 10 && n < (int)fb_sz) { + if (*buf == '\t') { + *buf++ = 0; + fb[n++] = buf; + continue; + } + buf++; + } + if (*buf == 10 && buf[-1] == 13) + buf[-1] = 0; + *buf = 0; + fb[n] = nullptr; + return n; +} + +inline int sf_blank(char** fb, char* buf, size_t fb_sz) { + while (isspace((ui8)*buf)) + buf++; + if (!*buf) { + *fb = nullptr; + return 0; + } + fb_sz--; + int n = 1; + fb[0] = buf; + while (*buf && *buf != 10 && n < (int)fb_sz) { + if (isspace((ui8)*buf)) { + *buf++ = 0; + while (isspace((ui8)*buf)) + buf++; + if (*buf) + fb[n++] = buf; + continue; + } + buf++; + } + if (*buf == 10 && buf[-1] == 13) + buf[-1] = 0; + *buf = 0; + fb[n] = nullptr; + return n; +} + +int sf(char fs, char** fb, char* buf, size_t fb_sz) { + if (fs == ' ') + return sf_blank(fb, buf, fb_sz); + while (*buf == fs) + buf++; + if (!(*buf && *buf != 10)) { + *fb = nullptr; + return 0; + } + fb_sz--; + int n = 1; + fb[0] = buf; + while (*buf && *buf != 10 && n < (int)fb_sz) { + if (*buf == fs) { + *buf++ = 0; + while (*buf == fs) + buf++; + fb[n++] = buf; + continue; + } + buf++; + } + if (*buf == 10 && buf[-1] == 13) + buf[-1] = 0; + *buf = 0; + fb[n] = nullptr; + return n; +} + +int sf(const char* fs, char** fb, char* buf, size_t fb_sz) { + if (!(*buf && *buf != 10)) { + *fb = nullptr; + return 0; + } + int fs_len = strlen(fs); + fb_sz--; + int n = 1; + fb[0] = buf; + while (*buf && *buf != 10 && n < (int)fb_sz) { + if (*buf == *fs && !strncmp(buf + 1, fs + 1, fs_len - 1)) { + *buf = 0; + buf += fs_len; + fb[n++] = buf; + continue; + } + buf++; + } + if (*buf == 10 && buf[-1] == 13) + buf[-1] = 0; + *buf = 0; + fb[n] = nullptr; + return n; +} + +inline bool is_end(const char* p) { + return !p || !p[0]; +} + +int sf(const char* seps, char* buf, char** fb, size_t fb_sz) { + if (fb_sz < 1 || is_end(buf)) { + *fb = nullptr; + return 0; + } + str_spn sseps(seps); + fb[0] = nullptr; + int n = 0; + // skip leading delimeters + buf = sseps.cbrk(buf); + if (is_end(buf)) + return 0; + // store fields + while (n < (int)fb_sz) { + fb[n++] = buf; + // find delimeters + buf = sseps.brk(buf + 1); + if (is_end(buf)) + break; + *buf = 0; + // skip delimiters + buf = sseps.cbrk(buf + 1); + if (is_end(buf)) + break; + } + fb[n] = nullptr; + return n; +} + +void TLineSplitter::operator()(char* p, TVector<char*>& fields) const { + if (!p || !*p) + return; + char* q = p; + while (1) { + p = Sep.brk(p); + if (q && (p - q || !SkipEmpty())) + fields.push_back(q); + q = nullptr; + if (!*p) + break; + if (SepStrLen == 1 || (SepStrLen > 1 && !strncmp(p + 1, SepStr + 1, SepStrLen - 1))) { + *p = 0; + p += SepStrLen; + q = p; + } else + p++; + } +} + +void TLineSplitter::operator()(const char* p, TVector<std::pair<const char*, size_t>>& fields) const { + if (!p || !*p) + return; + const char* q = p; + while (1) { + p = Sep.brk(p); + if (q && (p - q || !SkipEmpty())) + fields.push_back(std::make_pair(q, p - q)); + q = nullptr; + if (!*p) + break; + if (SepStrLen == 1 || (SepStrLen > 1 && !strncmp(p + 1, SepStr + 1, SepStrLen - 1))) { + p += SepStrLen; + q = p; + } else + p++; + } +} + +TSFReader::TSFReader(const char* fname, char sep, i32 nfrq) // if sep == ' ' isspace will be imitated (for compat) + : Split(str_spn(sep == ' ' ? "\t\n\v\f\r " : TString(1, sep).data()), sep == ' ') + , OpenPipe(false) +{ + Open(fname, nfrq); +} + +TSFReader::TSFReader(const char* fname, const char* sep, i32 nfrq) + : Split(sep, false) + , OpenPipe(false) +{ + Open(fname, nfrq); +} + +TSFReader::TSFReader(const char* fname, const TLineSplitter& spl, i32 nfrq) + : Split(spl) + , OpenPipe(false) +{ + Open(fname, nfrq); +} + +void TSFReader::Open(const char* fname, i32 nfrq, size_t vbuf_size) { + FieldsRequired = nfrq; + NF = NR = 0; + + if (IsOpen()) + File.close(); + + if (!fname) + return; + + if (!strcmp(fname, "/dev/stdin")) { + File.assign(stdin, "/dev/stdin"); + } else { + if (OpenPipe) + File.popen(fname, "r"); + else + File.open(fname, "r"); + } + OpenPipe = false; + if (!isatty(fileno(File))) + setvbuf(File, nullptr, _IOFBF, vbuf_size); +} + +void TSFReader::Popen(const char* pname, i32 nfrq, size_t vbuf_size) { + OpenPipe = true; + Open(pname, nfrq, vbuf_size); +} + +bool TSFReader::NextLine(segmented_string_pool* pool) { + size_t line_len = 0; + +#ifdef __FreeBSD__ + char* ptr = fgetln(File, &line_len); + if (!ptr) + return false; + if (!line_len || ptr[line_len - 1] != '\n') { // last line w/o newline + Buf.AssignNoAlias(ptr, line_len); + ptr = Buf.begin(); + } else { + // can safely replace newline with \0 + ptr[line_len - 1] = 0; + --line_len; + } +#else + if (!getline(File, Buf)) + return false; + char* ptr = Buf.begin(); + line_len = Buf.size(); +#endif + if (line_len && ptr[line_len - 1] == '\r') + ptr[line_len - 1] = 0; + + if (pool) { + char* nptr = pool->append(ptr); + Y_ASSERT(!strcmp(ptr, nptr)); + ptr = nptr; + } + + ++NR; + Fields.clear(); + Split(ptr, Fields); + NF = Fields.size(); + + if (FieldsRequired != -1 && FieldsRequired != (int)NF) + ythrow yexception() << File.name() << " line " << NR << ": " << NF << " fields, expected " << FieldsRequired; + + return true; +} + +int prnstr::f(const char* c, ...) { + va_list params; + int n = asize - pos, k; + va_start(params, c); + while ((k = vsnprintf(buf + pos, n, c, params)) >= n) { + n += asize, asize *= 2; + while (k + pos >= n) + n += asize, asize *= 2; + char* t = new char[asize]; + memcpy(t, buf, pos); + delete[] buf; + buf = t; + va_end(params); + va_start(params, c); + } + pos += k; + va_end(params); + return k; +} +int prnstr::s(const char* c, size_t k) { + if (!c) + return 0; + size_t n = asize - pos; + if (k >= n) { + n += asize, asize *= 2; + while (k + pos >= n) + n += asize, asize *= 2; + char* t = new char[asize]; + memcpy(t, buf, pos); + delete[] buf; + buf = t; + } + memcpy(buf + pos, c, k); + pos += k; + buf[pos] = 0; + return k; +} +void prnstr::clear() { + pos = 0; + if (asize > 32768) { + asize = 32768; + delete[] buf; + buf = new char[asize]; + } +} + +void prnstr::swap(prnstr& w) { + std::swap(buf, w.buf); + std::swap(pos, w.pos); + std::swap(asize, w.asize); +} + +FILE* read_or_die(const char* fname) { + FILE* f = fopen(fname, "rb"); + if (!f) + err(1, "%s", fname); + return f; +} +FILE* write_or_die(const char* fname) { + FILE* f = fopen(fname, "wb"); + if (!f) + err(1, "%s", fname); + return f; +} +FILE* fopen_or_die(const char* fname, const char* mode) { + FILE* f = fopen(fname, mode); + if (!f) + err(1, "%s (mode '%s')", fname, mode); + return f; +} + +FILE* fopen_chk(const char* fname, const char* mode) { + FILE* f = fopen(fname, mode); + if (!f) + ythrow yexception() << fname << " (mode '" << mode << "'): " << LastSystemErrorText(); + return f; +} + +void fclose_chk(FILE* f, const char* fname) { + if (fclose(f)) + ythrow yexception() << "file " << fname << ": " << LastSystemErrorText(); +} diff --git a/library/cpp/deprecated/fgood/ffb.h b/library/cpp/deprecated/fgood/ffb.h new file mode 100644 index 0000000000..ca229eb65a --- /dev/null +++ b/library/cpp/deprecated/fgood/ffb.h @@ -0,0 +1,264 @@ +#pragma once + +#include "fgood.h" + +#include <util/string/util.h> // str_spn +#include <util/string/split.h> // str_spn +#include <util/memory/segmented_string_pool.h> +#include <util/generic/string.h> +#include <util/generic/vector.h> +#include <util/generic/noncopyable.h> + +#include <utility> + +#include <cstdarg> +#include <cstring> + +struct ffb: public TFILEPtr { + ffb() { + } + ffb(FILE* file); + ffb(const char* name, const char* mode) { + open(name, mode); + } + void operator=(FILE* f); // take ownership + void open(const char* name, const char* mode); + int f(const char* c, ...) { + va_list args; + va_start(args, c); + return vfprintf(*this, c, args); + } + void s(const char* c) { + fsput(c, strlen(c)); + } + void b(const void* cc, int n) { + fsput((const char*)cc, n); + } + void B(const void* cc, int N) { + fsput((const char*)cc, N); + } + void c(char c) { + fputc(c); + } + void cbe(wchar16 c) { // big endian utf-16 + fputc(char(c >> 8)); //Hi8 + fputc(char(c & 255)); //Lo8 + } + void sbe(const wchar16* c) { + for (; *c; c++) + cbe(*c); + } + void fclose() { + close(); + } +}; + +// split fields of tab-delimited line of text +// here and below fb actual size must be fb_sz + 1 to allow fb[fb_sz] be zero +int sf(char** fb, char* buf, size_t fb_sz); +int sf(char** fb, char* buf /* fb_sz == 32 */); + +// split fields of char-delimited line of text +// Achtung: delim = ' ' imitates awk: initial separators are skipped, +// repeated seps treated as one, all chars less than ' ' treated as separators. +int sf(char fs, char** fb, char* buf, size_t fb_sz = 32); + +// split fields of string-delimited line of text (fs is NOT a regexp) +// (usually fs is "@@") +int sf(const char* fs, char** fb, char* buf, size_t fb_sz = 32); + +// split fields of char-delimited line of text, set of char-separators is given +// Achtung: repeated seps treated as one, initial seps are skipped +// newlines are NOT ignored. +int sf(const char* seps, char* buf, char** fb, size_t fb_sz = 32); + +inline char* chomp(char* buf) { + char* c = buf + strlen(buf); + if (c > buf && c[-1] == '\n') { + *--c = 0; +#ifdef _win32_ + if (c > buf && c[-1] == '\r') + *--c = 0; +#endif + } + return buf; +} + +inline char* chomp_cr(char* buf) { + char* c = buf + strlen(buf); + if (c > buf && c[-1] == '\n') + *--c = 0; + if (c > buf && c[-1] == '\r') + *--c = 0; + return buf; +} + +class TLineSplitter { +protected: + enum { // Default: Split string by SepStr + SplitByAnySep = 1, // Split string by Sep + NoEmptyFields = 2 // Skip all empty fields between separators + }; + +private: + ui32 Flags; + const str_spn Sep; // collection of separators + const char* SepStr; // pointer exact string to separate by + size_t SepStrLen; // length of separator string + +public: + TLineSplitter(const char* sep, bool noEmpty) + : Flags(noEmpty ? NoEmptyFields : 0) + , Sep(TString(sep, 1).data()) + , SepStr(sep) + , SepStrLen(strlen(sep)) + { + } + TLineSplitter(const str_spn& sep, bool noEmpty = false) + : Flags(SplitByAnySep | (noEmpty ? NoEmptyFields : 0)) + , Sep(sep) + , SepStr(nullptr) + , SepStrLen(1) + { + } + bool AnySep() const { + return Flags & SplitByAnySep; + } + bool SkipEmpty() const { + return Flags & NoEmptyFields; + } + /// Separates string onto tokens + /// Expecting a zero-terminated string + /// By default returns empty fields between sequential separators + void operator()(char* p, TVector<char*>& fields) const; + /// Same, but for const string - fills vector of pairs (pointer, length) + void operator()(const char* p, TVector<std::pair<const char*, size_t>>& fields) const; +}; + +/** + * Use library/cpp/map_text_file/map_tsv_file.h instead. + */ +class TSFReader { + TString Buf; // buffer used for non-'\n'-terminated string and for non-freebsd work + TLineSplitter Split; + TVector<char*> Fields; + size_t NF; // Fields.size() + size_t NR; + + TFILEPtr File; + + bool OpenPipe; // internal flag that turns open() to popen() + + i32 FieldsRequired; // if != -1, != nf, terminate program + +public: + // char separator + // Achtung: delim = ' ' imitates awk: initial separators are skipped, + // all chars less than ' ' treated as separators. + TSFReader(const char* fname = nullptr, char sep = '\t', i32 nf_reqired = -1); + // exact string separator + TSFReader(const char* fname, const char* sep, i32 nf_reqired = -1); + // fully customizable + TSFReader(const char* fname, const TLineSplitter& spl, i32 nf_reqired = -1); + + void Open(const char* fname, i32 nf_reqired = -1, size_t vbufsize = 1u << 21); // use "/dev/stdin" for stdin + void Popen(const char* pname, i32 nf_reqired = -1, size_t vbufsize = 1u << 21); + + bool NextLine(segmented_string_pool* pool = nullptr); + + bool IsOpen() const { + return (FILE*)File != nullptr; + } + bool IsEof() const { + return feof(File); + } + void Close() { + File.close(); + } + void Rewind() { + File.seek(0, SEEK_SET); + } + void Seek(i64 offset, int mode = SEEK_SET) { + File.seek(offset, mode); + } + i64 Tell() const { + return ftell(File); + } + char*& operator[](size_t ind) { + //if (ind >= NF) + // throw yexception("Can't return reference to unexisting field %" PRISZT, ind); + return Fields[ind]; + } + const char* operator[](size_t ind) const { + if (ind >= NF) + return nullptr; + return Fields[ind]; + } + operator int() const { // note: empty input line makes 0 fields + return (int)NF; + } + const char* Name() const { + return File.name().data(); + } + size_t Line() const { + return NR; + } + const TVector<char*>& GetFields() const { + return Fields; + } +}; + +struct prnstr { + char* buf; + int pos; + int asize; + prnstr() + : pos(0) + { + asize = 32; + buf = new char[asize]; + } + explicit prnstr(int asz) + : pos(0) + { + asize = asz; + buf = new char[asize]; + } + int f(const char* c, ...); + int s(const char* c1, const char* c2); + int s(const char* c1, const char* c2, const char* c3); + int s(const char* c, size_t len); + //int s(const char *c); + int s(const char* c) { + return c ? s(c, strlen(c)) : 0; + } + int s(const TString& c); + int s_htmesc(const char* c, bool enc_utf = false); + int s_htmesc_w(const char* c); + int c(char c); + int cu(wchar32 c); //for utf-8 + void restart() { + *buf = 0; + pos = 0; + } + const char* operator~() const { + return buf; + } + int operator+() const { + return pos; + } + ~prnstr() { + delete[] buf; + } + void clear(); + void swap(prnstr& w); +}; + +// functions that terminate program upon failure +FILE* read_or_die(const char* fname); +FILE* write_or_die(const char* fname); +FILE* fopen_or_die(const char* fname, const char* mode); + +// functions that throw upon failure +FILE* fopen_chk(const char* fname, const char* mode); +void fclose_chk(FILE* f, const char* fname_dbg); diff --git a/library/cpp/deprecated/fgood/fgood.cpp b/library/cpp/deprecated/fgood/fgood.cpp new file mode 100644 index 0000000000..5d4725bfae --- /dev/null +++ b/library/cpp/deprecated/fgood/fgood.cpp @@ -0,0 +1,70 @@ +#include "fgood.h" + +#include <util/generic/cast.h> +#include <util/string/cast.h> +#include <util/system/fstat.h> + +#ifdef _win32_ +#include <io.h> +#endif + +i64 TFILEPtr::length() const { +#ifdef _win32_ + FHANDLE fd = (FHANDLE)_get_osfhandle(fileno(m_file)); +#else + FHANDLE fd = fileno(m_file); +#endif + i64 rv = GetFileLength(fd); + if (rv < 0) + ythrow yexception() << "TFILEPtr::length() " << Name.data() << ": " << LastSystemErrorText(); + return rv; +} + +FILE* OpenFILEOrFail(const TString& name, const char* mode) { + FILE* res = ::fopen(name.data(), mode); + if (!res) { + ythrow yexception() << "can't open \'" << name << "\' with mode \'" << mode << "\': " << LastSystemErrorText(); + } + return res; +} + +void TFILECloser::Destroy(FILE* file) { + ::fclose(file); +} + +#ifdef _freebsd_ // fgetln +#define getline getline_alt_4test +#endif // _freebsd_ + +bool getline(TFILEPtr& f, TString& s) { + char buf[4096]; + char* buf_ptr; + if (s.capacity() > sizeof(buf)) { + s.resize(s.capacity()); + if ((buf_ptr = fgets(s.begin(), IntegerCast<int>(s.capacity()), f)) == nullptr) + return false; + } else { + if ((buf_ptr = fgets(buf, sizeof(buf), f)) == nullptr) + return false; + } + size_t buf_len = strlen(buf_ptr); + bool line_complete = buf_len && buf_ptr[buf_len - 1] == '\n'; + if (line_complete) + buf_len--; + if (buf_ptr == s.begin()) + s.resize(buf_len); + else + s.AssignNoAlias(buf, buf_len); + if (line_complete) + return true; + while (fgets(buf, sizeof(buf), f)) { + size_t buf_len2 = strlen(buf); + if (buf_len2 && buf[buf_len2 - 1] == '\n') { + buf[buf_len2 - 1] = 0; + s.append(buf, buf_len2 - 1); + return true; + } + s.append(buf, buf_len2); + } + return true; +} diff --git a/library/cpp/deprecated/fgood/fgood.h b/library/cpp/deprecated/fgood/fgood.h new file mode 100644 index 0000000000..0aaf910c0f --- /dev/null +++ b/library/cpp/deprecated/fgood/fgood.h @@ -0,0 +1,328 @@ +#pragma once + +#include <util/system/yassert.h> +#include <util/system/defaults.h> +#include <util/generic/string.h> +#include <util/generic/yexception.h> +#include <util/generic/ptr.h> + +#include "fput.h" + +#include <cstdio> + +#include <fcntl.h> + +#ifdef _unix_ +extern "C" int __ungetc(int, FILE*); +#endif + +#if (!defined(__FreeBSD__) && !defined(__linux__) && !defined(_darwin_) && !defined(_cygwin_)) || defined(_bionic_) +#define feof_unlocked(_stream) feof(_stream) +#define ferror_unlocked(_stream) ferror(_stream) +#endif + +#ifndef _unix_ +#if defined(_MSC_VER) && (_MSC_VER < 1900) +#define getc_unlocked(_stream) (--(_stream)->_cnt >= 0 ? 0xff & *(_stream)->_ptr++ : _filbuf(_stream)) +#define putc_unlocked(_c, _stream) (--(_stream)->_cnt >= 0 ? 0xff & (*(_stream)->_ptr++ = (char)(_c)) : _flsbuf((_c), (_stream))) +#else +#define getc_unlocked(_stream) getc(_stream) +#define putc_unlocked(_c, _stream) putc(_c, _stream) +#endif +#endif + +inline bool fgood(FILE* f) { + return !feof_unlocked(f) && !ferror_unlocked(f); +} + +#ifdef _win32_ +// These functions will work only with static MSVC runtime linkage. For dynamic linkage, +// fseeki64.c and ftelli64.c from CRT sources should be included in project +extern "C" int __cdecl _fseeki64(FILE*, __int64, int); +extern "C" __int64 __cdecl _ftelli64(FILE*); + +inline i64 ftello(FILE* stream) { + return _ftelli64(stream); +} + +inline int fseeko(FILE* stream, i64 offset, int origin) { + return _fseeki64(stream, offset, origin); +} +#endif + +class TFILEPtr { +private: + enum { SHOULD_CLOSE = 1, + IS_PIPE = 2 }; + FILE* m_file; + int m_Flags; + TString Name; + +public: + TFILEPtr() noexcept { + m_file = nullptr; + m_Flags = 0; + } + TFILEPtr(const TString& name, const char* mode) { + m_file = nullptr; + m_Flags = 0; + open(name, mode); + } + TFILEPtr(const TFILEPtr& src) noexcept { + m_file = src.m_file; + m_Flags = 0; + } + TFILEPtr& operator=(const TFILEPtr& src) { + if (src.m_file != m_file) { + close(); + m_file = src.m_file; + m_Flags = 0; + } + return *this; + } + explicit TFILEPtr(FILE* f) noexcept { // take ownership + m_file = f; + m_Flags = SHOULD_CLOSE; + } + TFILEPtr& operator=(FILE* f) { // take ownership + if (f != m_file) { + close(); + m_file = f; + m_Flags = SHOULD_CLOSE; + } + return *this; + } + const TString& name() const { + return Name; + } + operator FILE*() const noexcept { + return m_file; + } + FILE* operator->() const noexcept { + return m_file; + } + bool operator!() const noexcept { + return m_file == nullptr; + } + bool operator!=(FILE* f) const noexcept { + return m_file != f; + } + bool operator==(FILE* f) const noexcept { + return m_file == f; + } + ~TFILEPtr() { + close(); + } + void Y_PRINTF_FORMAT(2, 3) check(const char* message, ...) const { + if (Y_UNLIKELY(!fgood(m_file))) { + va_list args; + va_start(args, message); + char buf[512]; + vsnprintf(buf, 512, message, args); + // XXX: errno is undefined here + ythrow yexception() << buf << ": " << LastSystemErrorText() << ", " << Name.data() << " at offset " << (i64)ftell(); + } + } + TFILEPtr& assign(FILE* f, const char* name = nullptr) { // take ownership and have a name + *this = f; + if (name) + Name = name; + return *this; + } + void open(const TString& name, const char* mode) { + Y_ASSERT(!name.empty()); + Y_ASSERT(m_file == nullptr); + m_file = ::fopen(name.data(), mode); + if (!m_file) + ythrow yexception() << "can't open \'" << name << "\' with mode \'" << mode << "\': " << LastSystemErrorText(); + m_Flags = SHOULD_CLOSE; + Name = name; + } + void popen(const TString& command, const char* mode) { + Y_ASSERT(!command.empty()); + Y_ASSERT(m_file == nullptr); + m_file = ::popen(command.data(), mode); + if (!m_file) + ythrow yexception() << "can't execute \'" << command << "\' with mode \'" << mode << "\': " << LastSystemErrorText(); + m_Flags = IS_PIPE | SHOULD_CLOSE; + Name = command; + } + void close() { + if (m_file != nullptr && (m_Flags & SHOULD_CLOSE)) { + if ((m_Flags & IS_PIPE) ? ::pclose(m_file) : ::fclose(m_file)) { + m_file = nullptr; + m_Flags = 0; + if (!UncaughtException()) + ythrow yexception() << "can't close file " << Name.data() << ": " << LastSystemErrorText(); + } + } + m_file = nullptr; + m_Flags = 0; + Name.clear(); + } + size_t write(const void* buffer, size_t size, size_t count) const { + Y_ASSERT(m_file != nullptr); + size_t r = ::fwrite(buffer, size, count, m_file); + check("can't write %lu bytes", (unsigned long)size * count); + return r; + } + size_t read(void* buffer, size_t size, size_t count) const { + Y_ASSERT(m_file != nullptr); + size_t r = ::fread(buffer, size, count, m_file); + if (ferror_unlocked(m_file)) + ythrow yexception() << "can't read " << (unsigned long)size * count << " bytes: " << LastSystemErrorText() << ", " << Name.data() << " at offset " << (i64)ftell(); + return r; + } + char* fgets(char* buffer, int size) const { + Y_ASSERT(m_file != nullptr); + char* r = ::fgets(buffer, size, m_file); + if (ferror_unlocked(m_file)) + ythrow yexception() << "can't read string of maximum size " << size << ": " << LastSystemErrorText() << ", " << Name.data() << " at offset " << (i64)ftell(); + return r; + } + void Y_PRINTF_FORMAT(2, 3) fprintf(const char* format, ...) { + Y_ASSERT(m_file != nullptr); + va_list args; + va_start(args, format); + vfprintf(m_file, format, args); + check("can't write"); + } + void seek(i64 offset, int origin) const { + Y_ASSERT(m_file != nullptr); +#if defined(_unix_) || defined(_win32_) + if (fseeko(m_file, offset, origin) != 0) +#else + Y_ASSERT(offset == (i64)(i32)offset); + if (::fseek(m_file, (long)offset, origin) != 0) +#endif + ythrow yexception() << "can't seek " << Name.data() << " by " << offset << ": " << LastSystemErrorText(); + } + i64 length() const; // uses various system headers -> in fileptr.cpp + + void setDirect() const { +#if !defined(_win_) && !defined(_darwin_) + if (!m_file) + ythrow yexception() << "file not open"; + if (fcntl(fileno(m_file), F_SETFL, O_DIRECT) == -1) + ythrow yexception() << "Cannot set O_DIRECT flag"; +#endif + } + + // for convenience + + i64 ftell() const noexcept { +#if defined(_unix_) || defined(_win32_) + return ftello(m_file); +#else + return ftell(m_file); +#endif + } + bool eof() const noexcept { + Y_ASSERT(m_file != nullptr); + return feof_unlocked(m_file) != 0; + } + int fputc(int c) { + Y_ASSERT(m_file != nullptr); + return putc_unlocked(c, m_file); + } + size_t fputs(const char* buffer) const { + return write(buffer, strlen(buffer), 1); + } + int fgetc() { + Y_ASSERT(m_file != nullptr); + return getc_unlocked(m_file); + } + int ungetc(int c) { + Y_ASSERT(m_file != nullptr); + return ::ungetc(c, m_file); + } + template <class T> + size_t fput(const T& a) { + Y_ASSERT(m_file != nullptr); + return ::fput(m_file, a); + } + template <class T> + size_t fget(T& a) { + Y_ASSERT(m_file != nullptr); + return ::fget(m_file, a); + } + size_t fsput(const char* s, size_t l) { + Y_ASSERT(m_file != nullptr); + return ::fsput(m_file, s, l); + } + size_t fsget(char* s, size_t l) { + Y_ASSERT(m_file != nullptr); + return ::fsget(m_file, s, l); + } + + void fflush() { + ::fflush(m_file); + } + + /* This block contains some TFile/TStream - compatible names */ + size_t Read(void* bufferIn, size_t numBytes) { + size_t r = fsget((char*)bufferIn, numBytes); + if (Y_UNLIKELY(ferror_unlocked(m_file))) + ythrow yexception() << "can't read " << numBytes << " bytes: " << LastSystemErrorText() << ", " << Name << " at offset " << (i64)ftell(); + return r; + } + void Write(const void* buffer, size_t numBytes) { + write(buffer, 1, numBytes); + } + i64 Seek(i64 offset, int origin /*SeekDir*/) { + seek(offset, origin); + return ftell(); + } + i64 GetPosition() const noexcept { + return ftell(); + } + i64 GetLength() const noexcept { + return length(); + } + bool ReadLine(TString& st); + + /* Similar to TAutoPtr::Release - return pointer and forget about it. */ + FILE* Release() noexcept { + FILE* result = m_file; + m_file = nullptr; + m_Flags = 0; + Name.clear(); + return result; + } +}; + +inline void fclose(TFILEPtr& F) { + F.close(); +} + +inline void fseek(const TFILEPtr& F, i64 offset, int whence) { + F.seek(offset, whence); +} + +#ifdef _freebsd_ // fgetln +inline bool getline(TFILEPtr& f, TString& s) { + size_t len; + char* buf = fgetln(f, &len); + if (!buf) + return false; + if (len && buf[len - 1] == '\n') + len--; + s.AssignNoAlias(buf, len); + return true; +} +#else +bool getline(TFILEPtr& f, TString& s); +#endif //_freebsd_ + +inline bool TFILEPtr::ReadLine(TString& st) { + return getline(*this, st); +} + +FILE* OpenFILEOrFail(const TString& name, const char* mode); + +//Should be used with THolder +struct TFILECloser { + static void Destroy(FILE* file); +}; + +using TFILEHolder = THolder<FILE, TFILECloser>; diff --git a/library/cpp/deprecated/fgood/fput.h b/library/cpp/deprecated/fgood/fput.h new file mode 100644 index 0000000000..690b06332d --- /dev/null +++ b/library/cpp/deprecated/fgood/fput.h @@ -0,0 +1,79 @@ +#pragma once + +#include <util/system/defaults.h> +#include <util/system/valgrind.h> + +#include <cstdio> + +#ifdef __FreeBSD__ +#include <cstring> + +template <class T> +Y_FORCE_INLINE size_t fput(FILE* F, const T& a) { + if (Y_LIKELY(F->_w >= int(sizeof(a)))) { + memcpy(F->_p, &a, sizeof(a)); + F->_p += sizeof(a); + F->_w -= sizeof(a); + return 1; + } else { + return fwrite(&a, sizeof(a), 1, F); + } +} + +template <class T> +Y_FORCE_INLINE size_t fget(FILE* F, T& a) { + if (Y_LIKELY(F->_r >= int(sizeof(a)))) { + memcpy(&a, F->_p, sizeof(a)); + F->_p += sizeof(a); + F->_r -= sizeof(a); + return 1; + } else { + return fread(&a, sizeof(a), 1, F); + } +} + +inline size_t fsput(FILE* F, const char* s, size_t l) { + VALGRIND_CHECK_READABLE(s, l); + + if ((size_t)F->_w >= l) { + memcpy(F->_p, s, l); + F->_p += l; + F->_w -= l; + return l; + } else { + return fwrite(s, 1, l, F); + } +} + +inline size_t fsget(FILE* F, char* s, size_t l) { + if ((size_t)F->_r >= l) { + memcpy(s, F->_p, l); + F->_p += l; + F->_r -= l; + return l; + } else { + return fread(s, 1, l, F); + } +} +#else +template <class T> +Y_FORCE_INLINE size_t fput(FILE* F, const T& a) { + return fwrite(&a, sizeof(a), 1, F); +} + +template <class T> +Y_FORCE_INLINE size_t fget(FILE* F, T& a) { + return fread(&a, sizeof(a), 1, F); +} + +inline size_t fsput(FILE* F, const char* s, size_t l) { +#ifdef WITH_VALGRIND + VALGRIND_CHECK_READABLE(s, l); +#endif + return fwrite(s, 1, l, F); +} + +inline size_t fsget(FILE* F, char* s, size_t l) { + return fread(s, 1, l, F); +} +#endif diff --git a/library/cpp/deprecated/fgood/ya.make b/library/cpp/deprecated/fgood/ya.make new file mode 100644 index 0000000000..2394f9ad7a --- /dev/null +++ b/library/cpp/deprecated/fgood/ya.make @@ -0,0 +1,8 @@ +LIBRARY() + +SRCS( + ffb.cpp + fgood.cpp +) + +END() diff --git a/library/cpp/deprecated/mapped_file/CMakeLists.darwin-x86_64.txt b/library/cpp/deprecated/mapped_file/CMakeLists.darwin-x86_64.txt new file mode 100644 index 0000000000..a00407491d --- /dev/null +++ b/library/cpp/deprecated/mapped_file/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(cpp-deprecated-mapped_file) +target_link_libraries(cpp-deprecated-mapped_file PUBLIC + contrib-libs-cxxsupp + yutil +) +target_sources(cpp-deprecated-mapped_file PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/deprecated/mapped_file/mapped_file.cpp +) diff --git a/library/cpp/deprecated/mapped_file/CMakeLists.linux-aarch64.txt b/library/cpp/deprecated/mapped_file/CMakeLists.linux-aarch64.txt new file mode 100644 index 0000000000..2bb5db017b --- /dev/null +++ b/library/cpp/deprecated/mapped_file/CMakeLists.linux-aarch64.txt @@ -0,0 +1,18 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(cpp-deprecated-mapped_file) +target_link_libraries(cpp-deprecated-mapped_file PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil +) +target_sources(cpp-deprecated-mapped_file PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/deprecated/mapped_file/mapped_file.cpp +) diff --git a/library/cpp/deprecated/mapped_file/CMakeLists.linux-x86_64.txt b/library/cpp/deprecated/mapped_file/CMakeLists.linux-x86_64.txt new file mode 100644 index 0000000000..2bb5db017b --- /dev/null +++ b/library/cpp/deprecated/mapped_file/CMakeLists.linux-x86_64.txt @@ -0,0 +1,18 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(cpp-deprecated-mapped_file) +target_link_libraries(cpp-deprecated-mapped_file PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil +) +target_sources(cpp-deprecated-mapped_file PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/deprecated/mapped_file/mapped_file.cpp +) diff --git a/library/cpp/deprecated/mapped_file/CMakeLists.txt b/library/cpp/deprecated/mapped_file/CMakeLists.txt new file mode 100644 index 0000000000..f8b31df0c1 --- /dev/null +++ b/library/cpp/deprecated/mapped_file/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) + include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/library/cpp/deprecated/mapped_file/CMakeLists.windows-x86_64.txt b/library/cpp/deprecated/mapped_file/CMakeLists.windows-x86_64.txt new file mode 100644 index 0000000000..a00407491d --- /dev/null +++ b/library/cpp/deprecated/mapped_file/CMakeLists.windows-x86_64.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(cpp-deprecated-mapped_file) +target_link_libraries(cpp-deprecated-mapped_file PUBLIC + contrib-libs-cxxsupp + yutil +) +target_sources(cpp-deprecated-mapped_file PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/deprecated/mapped_file/mapped_file.cpp +) diff --git a/library/cpp/deprecated/mapped_file/mapped_file.cpp b/library/cpp/deprecated/mapped_file/mapped_file.cpp new file mode 100644 index 0000000000..b0e4511299 --- /dev/null +++ b/library/cpp/deprecated/mapped_file/mapped_file.cpp @@ -0,0 +1,64 @@ +#include "mapped_file.h" + +#include <util/generic/yexception.h> +#include <util/system/defaults.h> +#include <util/system/hi_lo.h> +#include <util/system/filemap.h> + +TMappedFile::TMappedFile(TFileMap* map, const char* dbgName) { + Map_ = map; + i64 len = Map_->Length(); + if (Hi32(len) != 0 && sizeof(size_t) <= sizeof(ui32)) + ythrow yexception() << "File '" << dbgName << "' mapping error: " << len << " too large"; + + Map_->Map(0, static_cast<size_t>(len)); +} + +TMappedFile::TMappedFile(const TFile& file, TFileMap::EOpenMode om, const char* dbgName) + : Map_(nullptr) +{ + init(file, om, dbgName); +} + +void TMappedFile::precharge(size_t off, size_t size) const { + if (!Map_) + return; + + Map_->Precharge(off, size); +} + +void TMappedFile::init(const TString& name) { + THolder<TFileMap> map(new TFileMap(name)); + TMappedFile newFile(map.Get(), name.data()); + Y_UNUSED(map.Release()); + newFile.swap(*this); + newFile.term(); +} + +void TMappedFile::init(const TString& name, size_t length, TFileMap::EOpenMode om) { + THolder<TFileMap> map(new TFileMap(name, length, om)); + TMappedFile newFile(map.Get(), name.data()); + Y_UNUSED(map.Release()); + newFile.swap(*this); + newFile.term(); +} + +void TMappedFile::init(const TFile& file, TFileMap::EOpenMode om, const char* dbgName) { + THolder<TFileMap> map(new TFileMap(file, om)); + TMappedFile newFile(map.Get(), dbgName); + Y_UNUSED(map.Release()); + newFile.swap(*this); + newFile.term(); +} + +void TMappedFile::init(const TString& name, TFileMap::EOpenMode om) { + THolder<TFileMap> map(new TFileMap(name, om)); + TMappedFile newFile(map.Get(), name.data()); + Y_UNUSED(map.Release()); + newFile.swap(*this); + newFile.term(); +} + +void TMappedFile::flush() { + Map_->Flush(); +} diff --git a/library/cpp/deprecated/mapped_file/ya.make b/library/cpp/deprecated/mapped_file/ya.make new file mode 100644 index 0000000000..309341f1da --- /dev/null +++ b/library/cpp/deprecated/mapped_file/ya.make @@ -0,0 +1,7 @@ +LIBRARY() + +SRCS( + mapped_file.cpp +) + +END() diff --git a/library/cpp/geo/CMakeLists.darwin-x86_64.txt b/library/cpp/geo/CMakeLists.darwin-x86_64.txt new file mode 100644 index 0000000000..87e48b4a71 --- /dev/null +++ b/library/cpp/geo/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,24 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(library-cpp-geo) +target_link_libraries(library-cpp-geo PUBLIC + contrib-libs-cxxsupp + yutil +) +target_sources(library-cpp-geo PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/geo/bbox.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/geo/geo.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/geo/point.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/geo/polygon.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/geo/load_save_helper.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/geo/size.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/geo/util.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/geo/window.cpp +) diff --git a/library/cpp/geo/CMakeLists.linux-aarch64.txt b/library/cpp/geo/CMakeLists.linux-aarch64.txt new file mode 100644 index 0000000000..cdad35989a --- /dev/null +++ b/library/cpp/geo/CMakeLists.linux-aarch64.txt @@ -0,0 +1,25 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(library-cpp-geo) +target_link_libraries(library-cpp-geo PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil +) +target_sources(library-cpp-geo PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/geo/bbox.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/geo/geo.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/geo/point.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/geo/polygon.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/geo/load_save_helper.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/geo/size.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/geo/util.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/geo/window.cpp +) diff --git a/library/cpp/geo/CMakeLists.linux-x86_64.txt b/library/cpp/geo/CMakeLists.linux-x86_64.txt new file mode 100644 index 0000000000..cdad35989a --- /dev/null +++ b/library/cpp/geo/CMakeLists.linux-x86_64.txt @@ -0,0 +1,25 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(library-cpp-geo) +target_link_libraries(library-cpp-geo PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil +) +target_sources(library-cpp-geo PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/geo/bbox.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/geo/geo.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/geo/point.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/geo/polygon.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/geo/load_save_helper.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/geo/size.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/geo/util.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/geo/window.cpp +) diff --git a/library/cpp/geo/CMakeLists.txt b/library/cpp/geo/CMakeLists.txt new file mode 100644 index 0000000000..f8b31df0c1 --- /dev/null +++ b/library/cpp/geo/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) + include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/library/cpp/geo/CMakeLists.windows-x86_64.txt b/library/cpp/geo/CMakeLists.windows-x86_64.txt new file mode 100644 index 0000000000..87e48b4a71 --- /dev/null +++ b/library/cpp/geo/CMakeLists.windows-x86_64.txt @@ -0,0 +1,24 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(library-cpp-geo) +target_link_libraries(library-cpp-geo PUBLIC + contrib-libs-cxxsupp + yutil +) +target_sources(library-cpp-geo PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/geo/bbox.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/geo/geo.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/geo/point.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/geo/polygon.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/geo/load_save_helper.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/geo/size.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/geo/util.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/geo/window.cpp +) diff --git a/library/cpp/geo/bbox.cpp b/library/cpp/geo/bbox.cpp new file mode 100644 index 0000000000..aa4258ac22 --- /dev/null +++ b/library/cpp/geo/bbox.cpp @@ -0,0 +1 @@ +#include "bbox.h" diff --git a/library/cpp/geo/bbox.h b/library/cpp/geo/bbox.h new file mode 100644 index 0000000000..7ec7e6f7d6 --- /dev/null +++ b/library/cpp/geo/bbox.h @@ -0,0 +1,59 @@ +#pragma once + +#include <util/generic/utility.h> + +#include "point.h" + +namespace NGeo { + + class TGeoBoundingBox { + public: + TGeoBoundingBox() + + = default; + + TGeoBoundingBox(const TGeoPoint& p1, const TGeoPoint& p2) { + MinX_ = Min(p1.Lon(), p2.Lon()); + MaxX_ = Max(p1.Lon(), p2.Lon()); + MinY_ = Min(p1.Lat(), p2.Lat()); + MaxY_ = Max(p1.Lat(), p2.Lat()); + } + + const double& GetMinX() const { + return MinX_; + } + + const double& GetMaxX() const { + return MaxX_; + } + + const double& GetMinY() const { + return MinY_; + } + + const double& GetMaxY() const { + return MaxY_; + } + + double Width() const { + return MaxX_ - MinX_; + } + + double Height() const { + return MaxY_ - MinY_; + } + + private: + double MinX_{std::numeric_limits<double>::quiet_NaN()}; + double MaxX_{std::numeric_limits<double>::quiet_NaN()}; + double MinY_{std::numeric_limits<double>::quiet_NaN()}; + double MaxY_{std::numeric_limits<double>::quiet_NaN()}; + }; + + inline bool operator==(const TGeoBoundingBox& a, const TGeoBoundingBox& b) { + return a.GetMinX() == b.GetMinX() && + a.GetMinY() == b.GetMinY() && + a.GetMaxX() == b.GetMaxX() && + a.GetMaxY() == b.GetMaxY(); + } +} // namespace NGeo diff --git a/library/cpp/geo/geo.cpp b/library/cpp/geo/geo.cpp new file mode 100644 index 0000000000..37adc5c62c --- /dev/null +++ b/library/cpp/geo/geo.cpp @@ -0,0 +1 @@ +#include "geo.h" diff --git a/library/cpp/geo/geo.h b/library/cpp/geo/geo.h new file mode 100644 index 0000000000..1aebacab5c --- /dev/null +++ b/library/cpp/geo/geo.h @@ -0,0 +1,8 @@ +#pragma once + +#include "bbox.h" +#include "point.h" +#include "polygon.h" +#include "size.h" +#include "util.h" +#include "window.h" diff --git a/library/cpp/geo/load_save_helper.cpp b/library/cpp/geo/load_save_helper.cpp new file mode 100644 index 0000000000..13fa7ac6df --- /dev/null +++ b/library/cpp/geo/load_save_helper.cpp @@ -0,0 +1,49 @@ +#include "load_save_helper.h" +#include <util/stream/input.h> + +void TSerializer<NGeo::TGeoPoint>::Save(IOutputStream* out, const NGeo::TGeoPoint& point) { + double lon = static_cast<double>(point.Lon()); + double lat = static_cast<double>(point.Lat()); + ::Save(out, lon); + ::Save(out, lat); +} + +void TSerializer<NGeo::TGeoPoint>::Load(IInputStream* in, NGeo::TGeoPoint& point) { + double lon = std::numeric_limits<double>::quiet_NaN(); + double lat = std::numeric_limits<double>::quiet_NaN(); + ::Load(in, lon); + ::Load(in, lat); + point = {lon, lat}; +} + +void TSerializer<NGeo::TGeoWindow>::Save(IOutputStream* out, const NGeo::TGeoWindow& window) { + const auto& center = window.GetCenter(); + const auto& size = window.GetSize(); + ::Save(out, center); + ::Save(out, size); +} + +void TSerializer<NGeo::TGeoWindow>::Load(IInputStream* in, NGeo::TGeoWindow& window) { + NGeo::TSize size{}; + NGeo::TGeoPoint center{}; + + ::Load(in, center); + ::Load(in, size); + + window = {center, size}; +} + +void TSerializer<NGeo::TSize>::Save(IOutputStream* out, const NGeo::TSize& size) { + double width = static_cast<double>(size.GetWidth()); + double height = static_cast<double>(size.GetHeight()); + ::Save(out, width); + ::Save(out, height); +} + +void TSerializer<NGeo::TSize>::Load(IInputStream* in, NGeo::TSize& size) { + double width = std::numeric_limits<double>::quiet_NaN(); + double height = std::numeric_limits<double>::quiet_NaN(); + ::Load(in, width); + ::Load(in, height); + size = {width, height}; +} diff --git a/library/cpp/geo/load_save_helper.h b/library/cpp/geo/load_save_helper.h new file mode 100644 index 0000000000..4a5fceea18 --- /dev/null +++ b/library/cpp/geo/load_save_helper.h @@ -0,0 +1,23 @@ +#pragma once + +#include <library/cpp/geo/window.h> +#include <util/stream/input.h> +#include <util/ysaveload.h> + +template <> +struct TSerializer<NGeo::TGeoPoint> { + static void Save(IOutputStream*, const NGeo::TGeoPoint&); + static void Load(IInputStream*, NGeo::TGeoPoint&); +}; + +template <> +struct TSerializer<NGeo::TGeoWindow> { + static void Save(IOutputStream*, const NGeo::TGeoWindow&); + static void Load(IInputStream*, NGeo::TGeoWindow&); +}; + +template <> +struct TSerializer<NGeo::TSize> { + static void Save(IOutputStream*, const NGeo::TSize&); + static void Load(IInputStream*, NGeo::TSize&); +}; diff --git a/library/cpp/geo/point.cpp b/library/cpp/geo/point.cpp new file mode 100644 index 0000000000..1d227c967f --- /dev/null +++ b/library/cpp/geo/point.cpp @@ -0,0 +1,146 @@ +#include "point.h" +#include "util.h" + +#include <util/generic/ylimits.h> +#include <util/generic/ymath.h> + +#include <cstdlib> +#include <utility> + +namespace NGeo { + namespace { + bool IsNonDegeneratePoint(double lon, double lat) { + return (MIN_LONGITUDE - WORLD_WIDTH < lon && lon < MAX_LONGITUDE + WORLD_WIDTH) && + (MIN_LATITUDE < lat && lat < MAX_LATITUDE); + } + } // namespace + + float TGeoPoint::Distance(const TGeoPoint& p) const noexcept { + auto dp = p - (*this); + return sqrtf(Sqr(GetWidthAtEquator(dp.GetWidth(), (Lat_ + p.Lat()) * 0.5)) + Sqr(dp.GetHeight())); + } + + bool TGeoPoint::IsPole() const noexcept { + return Lat_ <= MIN_LATITUDE || MAX_LATITUDE <= Lat_; + } + + bool TGeoPoint::IsVisibleOnMap() const noexcept { + return -VISIBLE_LATITUDE_BOUND <= Lat_ && Lat_ <= VISIBLE_LATITUDE_BOUND; + } + + TGeoPoint TGeoPoint::Parse(TStringBuf s, TStringBuf delimiter) { + const auto& [lon, lat] = PairFromString(s, delimiter); + Y_ENSURE_EX(IsNonDegeneratePoint(lon, lat), TBadCastException() << "Invalid point: (" << lon << ", " << lat << ")"); + return {lon, lat}; + } + + TMaybe<TGeoPoint> TGeoPoint::TryParse(TStringBuf s, TStringBuf delimiter) { + std::pair<double, double> lonLat; + if (!TryPairFromString(lonLat, s, delimiter)) { + return {}; + } + if (!IsNonDegeneratePoint(lonLat.first, lonLat.second)) { + return {}; + } + return TGeoPoint(lonLat.first, lonLat.second); + } + + TSize operator-(const TGeoPoint& p1, const TGeoPoint& p2) { + return {p1.Lon() - p2.Lon(), p1.Lat() - p2.Lat()}; + } + + /* + Conversion code was imported from http://wiki.yandex-team.ru/YandexMobile/maps/Algorithm/mapengine/coordtransforms + */ + namespace WGS84 { + /* Isometric to geodetic latitude parameters, default to WGS 84 */ + const double ab = 0.00335655146887969400; + const double bb = 0.00000657187271079536; + const double cb = 0.00000001764564338702; + const double db = 0.00000000005328478445; + + const double _a = R; + const double _f = 1.0 / 298.257223563; + const double _b = _a - _f * _a; + const double _e = sqrt(1 - pow(_b / _a, 2)); + const double _e2 = _e * _e; + const double _g = sqrt(1.0 - _e2); + const double _gR2 = _g * R * 2.0; + } // namespace WGS84 + + TGeoPoint MercatorToLL(TMercatorPoint pt) { + using namespace WGS84; + + // Y_ENSURE(pt.IsDefined(), "Point is not defined"); + + /* Isometric latitude*/ + const double xphi = PI / 2.0 - 2.0 * atan(exp(-pt.Y_ / R)); + + double latitude = xphi + ab * sin(2.0 * xphi) + bb * sin(4.0 * xphi) + cb * sin(6.0 * xphi) + db * sin(8.0 * xphi); + double longitude = pt.X_ / R; + + return TGeoPoint{Rad2deg(longitude), Rad2deg(latitude)}; + } + + double GetMercatorY(const TGeoPoint& ll) { + if (Y_UNLIKELY(ll.Lat() == 0.)) { + // shortcut for common case, avoiding floating point errors + return 0.; + } + if (Y_UNLIKELY(ll.Lat() == MIN_LATITUDE)) { + return -std::numeric_limits<double>::infinity(); + } + if (Y_UNLIKELY(ll.Lat() == MAX_LATITUDE)) { + return +std::numeric_limits<double>::infinity(); + } + double lat = Deg2rad(ll.Lat()); + double esinLat = WGS84::_e * sin(lat); + + double tan_temp = tan(PI / 4.e0 + lat / 2.e0); + double pow_temp = pow(tan(PI / 4.e0 + asin(esinLat) / 2), WGS84::_e); + double U = tan_temp / pow_temp; + return WGS84::R * log(U); + } + + TMercatorPoint LLToMercator(TGeoPoint ll) { + // Y_ENSURE(ll.IsValid(), "Point is not defined"); + + // Y_ENSURE(-90. <= ll.Lat() && ll.Lat() <= +90., "Latitude is out of range [-90, 90]"); + + double lon = Deg2rad(ll.Lon()); + double x = WGS84::R * lon; + double y = GetMercatorY(ll); + + return TMercatorPoint{x, y}; + } + + double GeodeticDistance(TGeoPoint p1, TGeoPoint p2) { + using namespace WGS84; + + constexpr double deg2HalfRad = PI / 360.0; + + const double lon1Half = p1.Lon() * deg2HalfRad; + const double lon2Half = p2.Lon() * deg2HalfRad; + + const double lat1Half = p1.Lat() * deg2HalfRad; + const double lat2Half = p2.Lat() * deg2HalfRad; + + const double diffLatHalf = fabs(lat1Half - lat2Half); + const double diffLonHalf = fabs(lon1Half - lon2Half); + + if (diffLatHalf < 0.5e-8 && diffLonHalf < 0.5e-8) { + return 0; + } + + double s = sin(lat1Half + lat2Half); + double s2 = s * s; + double m = _gR2 / (1.0 - _e2 * s2); + + const double w = sin(diffLatHalf); + const double w2 = w * w; + const double cc = Max(1.0 - s2 - w2, 0.0); // cos(lat1Half * 2) * cos(lat2Half * 2) + const double z = sin(diffLonHalf); + + return m * asin(sqrt(w2 + cc * z * z)); + } +} // namespace NGeo diff --git a/library/cpp/geo/point.h b/library/cpp/geo/point.h new file mode 100644 index 0000000000..70c91ab2dd --- /dev/null +++ b/library/cpp/geo/point.h @@ -0,0 +1,198 @@ +#pragma once + +#include <util/generic/string.h> +#include <util/stream/output.h> +#include <util/string/cast.h> +#include <util/generic/maybe.h> + +#include <algorithm> +#include <cmath> + +namespace NGeo { + class TSize; + + class TGeoPoint { + public: + TGeoPoint(double lon, double lat) noexcept + : Lon_(lon) + , Lat_(lat) + { + } + + TGeoPoint() noexcept + : Lon_(BadX) + , Lat_(BadY) + { + } + + double Lon() const noexcept { + return Lon_; + } + + double Lat() const noexcept { + return Lat_; + } + + float Distance(const TGeoPoint& p) const noexcept; + + void swap(TGeoPoint& p) noexcept { + std::swap(Lon_, p.Lon_); + std::swap(Lat_, p.Lat_); + } + + bool IsValid() const { + return (Lon_ != BadX) && (Lat_ != BadY); + } + + /// Returns true if the point represents either North or South Pole + bool IsPole() const noexcept; + + /// Returns true if the point may be shown on the Yandex Map (fits into the valid range of latitudes) + bool IsVisibleOnMap() const noexcept; + + bool operator!() const { + return !IsValid(); + } + + TString ToCgiStr() const { + return ToString(); + } + + TString ToString(const char* delimiter = ",") const { + return TString::Join(::ToString(Lon_), delimiter, ::ToString(Lat_)); + } + + /** + * \note Parsing functions work is safe way. They discard invalid points: + * 1) on the Poles and 'beyond' the Poles; + * 2) not belonging to the 'main' world and +/-1 world to the left or to the right. + * If you need such cases, construct the TGeoPoint manually. + */ + + /// Throws TBadCastException on error + static TGeoPoint Parse(TStringBuf s, TStringBuf delimiter = TStringBuf(",")); + + /// Returns Nothing() on error + static TMaybe<TGeoPoint> TryParse(TStringBuf s, TStringBuf delimiter = TStringBuf(",")); + + private: + double Lon_; + double Lat_; + + static constexpr double BadX{361.}; + static constexpr double BadY{181.}; + }; + + double GeodeticDistance(TGeoPoint p1, TGeoPoint p2); + + /** + * \class TMercatorPoint + * + * Represents a point in EPSG:3395 projection + * (WGS 84 / World Mercator) + */ + class TMercatorPoint { + public: + friend class TMercatorWindow; + friend TGeoPoint MercatorToLL(TMercatorPoint); + + /** + * Constructs a point with the given coordinates. + */ + constexpr TMercatorPoint(double x, double y) noexcept + : X_{x} + , Y_{y} + { + } + + /** + * Constructs a point with two NaN coordinates. + * + * Should not be called directly. + * If your `point` variable might be undefined, + * declare it explicitly as TMaybe<TMercatorPoint>. + */ + constexpr TMercatorPoint() noexcept + : X_{std::numeric_limits<double>::quiet_NaN()} + , Y_{std::numeric_limits<double>::quiet_NaN()} + { + } + + /** + * Returns the X_ coordinate. + * + * The line X_ == 0 corresponds to the Prime meridian. + */ + constexpr double X() const noexcept { + return X_; + } + + /** + * Returns the Y_ coordinate. + * + * The line Y_ == 0 corresponds to the Equator. + */ + constexpr double Y() const noexcept { + return Y_; + } + + private: + bool IsDefined() const noexcept { + return !std::isnan(X_) && !std::isnan(Y_); + } + + private: + double X_; + double Y_; + }; + + /** + * Operators + */ + + inline bool operator==(const TGeoPoint& p1, const TGeoPoint& p2) { + return p1.Lon() == p2.Lon() && p1.Lat() == p2.Lat(); + } + + inline bool operator==(const TMercatorPoint& p1, const TMercatorPoint& p2) { + return p1.X() == p2.X() && p1.Y() == p2.Y(); + } + + inline bool operator<(const TGeoPoint& p1, const TGeoPoint& p2) { + if (p1.Lon() != p2.Lon()) { + return p1.Lon() < p2.Lon(); + } + return p1.Lat() < p2.Lat(); + } + + /** + * Conversion + */ + + namespace WGS84 { + /* Radius of reference ellipsoid, default to WGS 84 */ + const double R = 6378137.0; + } // namespace WGS84 + + using TPointLL = TGeoPoint; + using TPointXY = TMercatorPoint; + + TGeoPoint MercatorToLL(TMercatorPoint); + TMercatorPoint LLToMercator(TGeoPoint); + + /** + * Input/output + */ + + TSize operator-(const TGeoPoint& p1, const TGeoPoint& p2); +} // namespace NGeo + +template <> +inline void Out<NGeo::TGeoPoint>(IOutputStream& o, const NGeo::TGeoPoint& p) { + o << '[' << p.Lon() << ", " << p.Lat() << ']'; +} + +template <> +inline void Out<NGeo::TMercatorPoint>(IOutputStream& o, const NGeo::TMercatorPoint& p) { + o << '[' << p.X() << ", " << p.Y() << ']'; +} diff --git a/library/cpp/geo/polygon.cpp b/library/cpp/geo/polygon.cpp new file mode 100644 index 0000000000..44e5c38b5f --- /dev/null +++ b/library/cpp/geo/polygon.cpp @@ -0,0 +1,28 @@ +#include "polygon.h" +namespace NGeo { + TMaybe<TGeoPolygon> TGeoPolygon::TryParse(TStringBuf s, TStringBuf llDelimiter, TStringBuf pointsDelimiter) { + TVector<TGeoPoint> points; + + for (const auto& pointString : StringSplitter(s).SplitByString(pointsDelimiter).SkipEmpty()) { + auto curPoint = TGeoPoint::TryParse(pointString.Token(), llDelimiter); + if (!curPoint) { + return {}; + } + points.push_back(*curPoint); + } + + if (points.size() < 3) { + return {}; + } + + return TGeoPolygon(points); + } + + TGeoPolygon TGeoPolygon::Parse(TStringBuf s, TStringBuf llDelimiter, TStringBuf pointsDelimiter) { + auto res = TGeoPolygon::TryParse(s, llDelimiter, pointsDelimiter); + if (!res) { + ythrow yexception() << "Can't parse polygon from input string: " << s; + } + return *res; + } +} // namespace NGeo diff --git a/library/cpp/geo/polygon.h b/library/cpp/geo/polygon.h new file mode 100644 index 0000000000..1528345fec --- /dev/null +++ b/library/cpp/geo/polygon.h @@ -0,0 +1,90 @@ +#pragma once + +#include "point.h" +#include "window.h" + +#include <util/ysaveload.h> +#include <util/generic/algorithm.h> +#include <util/generic/string.h> +#include <util/generic/vector.h> +#include <util/generic/yexception.h> +#include <util/stream/output.h> +#include <util/string/cast.h> +#include <util/string/join.h> +#include <util/string/split.h> + +#include <algorithm> +#include <functional> + +namespace NGeo { + class TGeoPolygon { + private: + TVector<TGeoPoint> Points_; + TGeoWindow Window_; + + public: + TGeoPolygon() = default; + + explicit TGeoPolygon(const TVector<TGeoPoint>& points) + : Points_(points) + { + CalcWindow(); + } + + const TVector<TGeoPoint>& GetPoints() const { + return Points_; + } + + const TGeoWindow& GetWindow() const { + return Window_; + } + + void swap(TGeoPolygon& o) noexcept { + Points_.swap(o.Points_); + Window_.swap(o.Window_); + } + + bool IsValid() const noexcept { + return !Points_.empty() && Window_.IsValid(); + } + + bool operator!() const { + return !IsValid(); + } + + /** + * try to parse TGeoPolygon from string which stores points + * coords are separated by llDelimiter, points are separated by pointsDelimiter + * return parsed TGeoPolygon on success, otherwise throw exception + */ + static TGeoPolygon Parse(TStringBuf s, TStringBuf llDelimiter = ",", TStringBuf pointsDelimiter = TStringBuf(" ")); + + /** + * try to parse TGeoPolygon from string which stores points + * coords are separated by llDelimiter, points are separated by pointsDelimiter + * return TMaybe of parsed TGeoPolygon on success, otherwise return empty TMaybe + */ + static TMaybe<TGeoPolygon> TryParse(TStringBuf s, TStringBuf llDelimiter = ",", TStringBuf pointsDelimiter = TStringBuf(" ")); + + private: + void CalcWindow() { + auto getLon = std::mem_fn(&TGeoPoint::Lon); + double lowerX = MinElementBy(Points_.begin(), Points_.end(), getLon)->Lon(); + double upperX = MaxElementBy(Points_.begin(), Points_.end(), getLon)->Lon(); + + auto getLat = std::mem_fn(&TGeoPoint::Lat); + double lowerY = MinElementBy(Points_.begin(), Points_.end(), getLat)->Lat(); + double upperY = MaxElementBy(Points_.begin(), Points_.end(), getLat)->Lat(); + + Window_ = TGeoWindow{TGeoPoint{lowerX, lowerY}, TGeoPoint{upperX, upperY}}; + } + }; + + inline bool operator==(const TGeoPolygon& p1, const TGeoPolygon& p2) { + return p1.GetPoints() == p2.GetPoints(); + } + + inline bool operator!=(const TGeoPolygon& p1, const TGeoPolygon& p2) { + return !(p1 == p2); + } +} // namespace NGeo diff --git a/library/cpp/geo/size.cpp b/library/cpp/geo/size.cpp new file mode 100644 index 0000000000..f1bd8ab763 --- /dev/null +++ b/library/cpp/geo/size.cpp @@ -0,0 +1,31 @@ +#include "size.h" + +#include "util.h" + +namespace NGeo { + const double TSize::BadWidth = -1.; + const double TSize::BadHeight = -1.; + + namespace { + bool IsNonNegativeSize(double width, double height) { + return width >= 0. && height >= 0.; + } + } // namespace + + TSize TSize::Parse(TStringBuf s, TStringBuf delimiter) { + const auto& [width, height] = PairFromString(s, delimiter); + Y_ENSURE_EX(IsNonNegativeSize(width, height), TBadCastException() << "Negative window size"); + return {width, height}; + } + + TMaybe<TSize> TSize::TryParse(TStringBuf s, TStringBuf delimiter) { + std::pair<double, double> lonLat; + if (!TryPairFromString(lonLat, s, delimiter)) { + return {}; + } + if (!IsNonNegativeSize(lonLat.first, lonLat.second)) { + return {}; + } + return TSize{lonLat.first, lonLat.second}; + } +} // namespace NGeo diff --git a/library/cpp/geo/size.h b/library/cpp/geo/size.h new file mode 100644 index 0000000000..b619c6d899 --- /dev/null +++ b/library/cpp/geo/size.h @@ -0,0 +1,93 @@ +#pragma once + +#include <util/generic/string.h> +#include <util/stream/output.h> +#include <util/string/cast.h> + +namespace NGeo { + class TSize { + public: + TSize(double width, double height) noexcept + : Width_(width) + , Height_(height) + { + } + + explicit TSize(double size) noexcept + : Width_(size) + , Height_(size) + { + } + + TSize() noexcept + : Width_(BadWidth) + , Height_(BadHeight) + { + } + + double GetWidth() const noexcept { + return Width_; + } + + double GetHeight() const noexcept { + return Height_; + } + + void swap(TSize& s) noexcept { + std::swap(Width_, s.Width_); + std::swap(Height_, s.Height_); + } + + bool IsValid() const { + return (Width_ != BadWidth) && (Height_ != BadHeight); + } + + void Stretch(double multiplier) { + Width_ *= multiplier; + Height_ *= multiplier; + } + + void Inflate(double additionX, double additionY) { + Width_ += additionX; + Height_ += additionY; + } + + bool operator!() const { + return !IsValid(); + } + + TString ToCgiStr() const { + TString s = ToString(Width_); + s.append(','); + s.append(ToString(Height_)); + return s; + } + + /** + * try to parse TSize + * return parsed TSize on success, otherwise throw exception + */ + static TSize Parse(TStringBuf s, TStringBuf delimiter = TStringBuf(",")); + + /** + * try to parse TSize + * return TMaybe of parsed TSize on success, otherwise return empty TMaybe + */ + static TMaybe<TSize> TryParse(TStringBuf s, TStringBuf delimiter = TStringBuf(",")); + + private: + double Width_; + double Height_; + static const double BadWidth; + static const double BadHeight; + }; + + inline bool operator==(const TSize& p1, const TSize& p2) { + return p1.GetHeight() == p2.GetHeight() && p1.GetWidth() == p2.GetWidth(); + } +} // namespace NGeo + +template <> +inline void Out<NGeo::TSize>(IOutputStream& o, const NGeo::TSize& s) { + o << '<' << s.GetWidth() << ", " << s.GetHeight() << '>'; +} diff --git a/library/cpp/geo/style/ya.make b/library/cpp/geo/style/ya.make new file mode 100644 index 0000000000..f72d50f27e --- /dev/null +++ b/library/cpp/geo/style/ya.make @@ -0,0 +1,8 @@ +CPP_STYLE_TEST_14() + +STYLE( + library/cpp/geo/**/*.cpp + library/cpp/geo/**/*.h +) + +END() diff --git a/library/cpp/geo/ut/load_save_helper_ut.cpp b/library/cpp/geo/ut/load_save_helper_ut.cpp new file mode 100644 index 0000000000..f251f56630 --- /dev/null +++ b/library/cpp/geo/ut/load_save_helper_ut.cpp @@ -0,0 +1,90 @@ +#include "load_save_helper.h" +#include "point.h" + +#include <library/cpp/testing/unittest/registar.h> +#include <util/stream/str.h> +#include <util/ysaveload.h> + +namespace { + void CheckSave(const NGeo::TGeoPoint& point) { + TStringStream output; + ::Save(&output, point); + TStringStream answer; + ::Save(&answer, static_cast<double>(point.Lon())); + ::Save(&answer, static_cast<double>(point.Lat())); + UNIT_ASSERT_EQUAL(output.Str(), answer.Str()); + } + + void CheckLoad(const double x, const double y) { + TStringStream input; + ::Save(&input, x); + ::Save(&input, y); + NGeo::TGeoPoint output; + ::Load(&input, output); + + const double eps = 1.E-8; + UNIT_ASSERT_DOUBLES_EQUAL(static_cast<double>(output.Lon()), x, eps); + UNIT_ASSERT_DOUBLES_EQUAL(static_cast<double>(output.Lat()), y, eps); + } + + void CheckLoadAfterSavePointLL(double x, double y) { + NGeo::TGeoPoint answer = {x, y}; + TStringStream iostream; + ::Save(&iostream, answer); + NGeo::TGeoPoint output; + ::Load(&iostream, output); + + const double eps = 1.E-8; + UNIT_ASSERT_DOUBLES_EQUAL(static_cast<double>(output.Lon()), x, eps); + UNIT_ASSERT_DOUBLES_EQUAL(static_cast<double>(output.Lat()), y, eps); + } + + void CheckLoadAfterSaveWindowLL(NGeo::TGeoPoint center, NGeo::TSize size) { + NGeo::TGeoWindow answer = {center, size}; + TStringStream iostream; + ::Save(&iostream, answer); + NGeo::TGeoWindow output; + ::Load(&iostream, output); + UNIT_ASSERT_EQUAL(output.GetCenter(), answer.GetCenter()); + UNIT_ASSERT_EQUAL(output.GetSize(), answer.GetSize()); + } +} // namespace + +Y_UNIT_TEST_SUITE(TSaveLoadForPointLL) { + Y_UNIT_TEST(TestSave) { + // {27.561481, 53.902496} Minsk Lon and Lat + CheckSave({27.561481, 53.902496}); + CheckSave({-27.561481, 53.902496}); + CheckSave({27.561481, -53.902496}); + CheckSave({-27.561481, -53.902496}); + } + + Y_UNIT_TEST(TestLoad) { + CheckLoad(27.561481, 53.902496); + CheckLoad(-27.561481, 53.902496); + CheckLoad(27.561481, -53.902496); + CheckLoad(-27.561481, -53.902496); + } + + Y_UNIT_TEST(TestSaveLoad) { + CheckLoadAfterSavePointLL(27.561481, 53.902496); + CheckLoadAfterSavePointLL(-27.561481, 53.902496); + CheckLoadAfterSavePointLL(27.561481, -53.902496); + CheckLoadAfterSavePointLL(-27.561481, -53.902496); + CheckLoadAfterSavePointLL(0, 0); + } +} + +Y_UNIT_TEST_SUITE(TSaveLoadForWindowLL) { + Y_UNIT_TEST(TestSave) { + CheckLoadAfterSaveWindowLL({27.561481, 53.902496}, {1, 2}); + CheckLoadAfterSaveWindowLL({27.561481, 53.902496}, {2, 1}); + CheckLoadAfterSaveWindowLL({-27.561481, 53.902496}, {1, 2}); + CheckLoadAfterSaveWindowLL({-27.561481, 53.902496}, {2, 1}); + CheckLoadAfterSaveWindowLL({27.561481, -53.902496}, {1, 2}); + CheckLoadAfterSaveWindowLL({27.561481, -53.902496}, {2, 1}); + CheckLoadAfterSaveWindowLL({-27.561481, -53.902496}, {1, 2}); + CheckLoadAfterSaveWindowLL({-27.561481, -53.902496}, {2, 1}); + CheckLoadAfterSaveWindowLL({0, 0}, {0, 0}); + } +} diff --git a/library/cpp/geo/ut/point_ut.cpp b/library/cpp/geo/ut/point_ut.cpp new file mode 100644 index 0000000000..bbf8f32cea --- /dev/null +++ b/library/cpp/geo/ut/point_ut.cpp @@ -0,0 +1,171 @@ +#include "point.h" + +#include <library/cpp/testing/unittest/registar.h> + +using namespace NGeo; + +namespace { + void CheckMercator(TGeoPoint input, TMercatorPoint answer, double eps = 1.e-8) { + auto output = LLToMercator(input); + UNIT_ASSERT_DOUBLES_EQUAL(output.X(), answer.X(), eps); + UNIT_ASSERT_DOUBLES_EQUAL(output.Y(), answer.Y(), eps); + } + + void CheckGeo(TMercatorPoint input, TGeoPoint answer, double eps = 1.e-8) { + auto output = MercatorToLL(input); + UNIT_ASSERT_DOUBLES_EQUAL(output.Lon(), answer.Lon(), eps); + UNIT_ASSERT_DOUBLES_EQUAL(output.Lat(), answer.Lat(), eps); + } +} // namespace + +Y_UNIT_TEST_SUITE(TPointTest) { + Y_UNIT_TEST(TestGeoPointFromString) { + UNIT_ASSERT_EQUAL(TGeoPoint::Parse("0.15,0.67"), + TGeoPoint(0.15, 0.67)); + UNIT_ASSERT_EQUAL(TGeoPoint::Parse("-52.,-27."), + TGeoPoint(-52., -27.)); + UNIT_ASSERT_EQUAL(TGeoPoint::Parse("0.15 0.67", " "), + TGeoPoint(0.15, 0.67)); + UNIT_ASSERT_EQUAL(TGeoPoint::Parse("-27. -52", " "), + TGeoPoint(-27., -52.)); + UNIT_ASSERT_EQUAL(TGeoPoint::Parse("182,55"), + TGeoPoint(182., 55.)); + + // current behavior + UNIT_ASSERT(TGeoPoint::TryParse(TString{}).Empty()); + UNIT_ASSERT_EXCEPTION(TGeoPoint::Parse("Hello,world"), TBadCastException); + UNIT_ASSERT_EXCEPTION(TGeoPoint::Parse("640 17", " "), TBadCastException); + UNIT_ASSERT_EXCEPTION(TGeoPoint::Parse("50.,100"), TBadCastException); + UNIT_ASSERT_EQUAL(TGeoPoint::Parse(" 0.01, 0.01"), TGeoPoint(0.01, 0.01)); + UNIT_ASSERT_EXCEPTION(TGeoPoint::Parse("0.01 , 0.01"), TBadCastException); + UNIT_ASSERT_EXCEPTION(TGeoPoint::Parse("0.01, 0.01 "), TBadCastException); + } +} + +Y_UNIT_TEST_SUITE(TConversionTest) { + Y_UNIT_TEST(TestConversionGeoToMercator) { + // test data is obtained using PostGIS: + // SELECT ST_AsText(ST_Transform(ST_SetSRID(ST_MakePoint(lon, lat), 4326), 3395)) + + CheckMercator({27.547028, 53.893962}, {3066521.12982805, 7115552.47353991}); + CheckMercator({-70.862782, -53.002613}, {-7888408.80843475, -6949331.55685883}); + CheckMercator({37.588536, 55.734004}, {4184336.68718463, 7470303.90973406}); + CheckMercator({0., 0.}, {0, 0}); + } + + Y_UNIT_TEST(TestConversionMercatorToGeo) { + // test data is obtained using PostGIS: + // SELECT ST_AsText(ST_Transform(ST_SetSRID(ST_MakePoint(X, Y), 3395), 4326)) + + CheckGeo({3066521, 7115552}, {27.5470268337348, 53.8939594873943}); + CheckGeo({-7888409, -6949332}, {-70.8627837208599, -53.0026154014032}); + CheckGeo({4184336, 7470304}, {37.5885298269154, 55.734004457522}); + CheckGeo({0, 0}, {0., 0.}); + } + + Y_UNIT_TEST(TestExactConversion) { + // Zero maps to zero with no epsilons + UNIT_ASSERT_VALUES_EQUAL(LLToMercator({0., 0.}).X(), 0.); + UNIT_ASSERT_VALUES_EQUAL(LLToMercator({0., 0.}).Y(), 0.); + UNIT_ASSERT_VALUES_EQUAL(MercatorToLL({0., 0.}).Lon(), 0.); + UNIT_ASSERT_VALUES_EQUAL(MercatorToLL({0., 0.}).Lat(), 0.); + } + + Y_UNIT_TEST(TestPoles) { + UNIT_ASSERT_VALUES_EQUAL(LLToMercator({0, 90}).Y(), std::numeric_limits<double>::infinity()); + UNIT_ASSERT_VALUES_EQUAL(LLToMercator({0, -90}).Y(), -std::numeric_limits<double>::infinity()); + + UNIT_ASSERT_VALUES_EQUAL(MercatorToLL({0, std::numeric_limits<double>::infinity()}).Lat(), 90.); + UNIT_ASSERT_VALUES_EQUAL(MercatorToLL({0, -std::numeric_limits<double>::infinity()}).Lat(), -90.); + } + + Y_UNIT_TEST(TestNearPoles) { + // Reference values were obtained using mpmath library (floating-point arithmetic with arbitrary precision) + CheckMercator({0., 89.9}, {0., 44884542.157175040}, 1.e-6); + CheckMercator({0., 89.99}, {0., 59570746.872518855}, 1.e-5); + CheckMercator({0., 89.999}, {0., 74256950.065173316}, 1.e-4); + CheckMercator({0., 89.9999}, {0., 88943153.242600886}, 1.e-3); + CheckMercator({0., 89.99999}, {0., 103629356.41987618}, 1.e-1); + CheckMercator({0., 89.999999}, {0., 118315559.59714996}, 1.e-1); + CheckMercator({0., 89.9999999}, {0., 133001762.77442373}, 1.e-0); + CheckMercator({0., 89.99999999}, {0., 147687965.95169749}, 1.e+1); + CheckMercator({0., 89.9999999999999857891452847979962825775146484375}, {0., 233563773.75716050}, 1.e+7); + + CheckGeo({0., 233563773.75716050}, {0., 89.9999999999999857891452847979962825775146484375}, 1.e-15); + CheckGeo({0., 147687965.95169749}, {0., 89.99999999}, 1.e-13); + CheckGeo({0., 133001762.77442373}, {0., 89.9999999}, 1.e-13); + CheckGeo({0., 118315559.59714996}, {0., 89.999999}, 1.e-13); + CheckGeo({0., 103629356.41987618}, {0., 89.99999}, 1.e-13); + CheckGeo({0., 88943153.242600886}, {0., 89.9999}, 1.e-13); + CheckGeo({0., 74256950.065173316}, {0., 89.999}, 1.e-13); + CheckGeo({0., 59570746.872518855}, {0., 89.99}, 1.e-13); + CheckGeo({0., 44884542.157175040}, {0., 89.9}, 1.e-13); + } + + Y_UNIT_TEST(TestVisibleRange) { + UNIT_ASSERT(TGeoPoint(37., 55.).IsVisibleOnMap()); + UNIT_ASSERT(!TGeoPoint(37., 86.).IsVisibleOnMap()); + UNIT_ASSERT(TGeoPoint(37., -85.).IsVisibleOnMap()); + UNIT_ASSERT(!TGeoPoint(37., -90.).IsVisibleOnMap()); + } + + Y_UNIT_TEST(TestRoundTripGeoMercatorGeo) { + auto check = [](double longitude, double latitude) { + auto pt = MercatorToLL(LLToMercator(TGeoPoint{longitude, latitude})); + UNIT_ASSERT_DOUBLES_EQUAL_C(longitude, pt.Lon(), 1.e-12, "longitude for point (" << longitude << ", " << latitude << ")"); + UNIT_ASSERT_DOUBLES_EQUAL_C(latitude, pt.Lat(), 1.e-8, "latitude for point (" << longitude << ", " << latitude << ")"); + }; + + check(37., 55.); + check(0.1, 0.1); + check(0.2, 89.9); + check(181., -42.); + check(362., -43.); + check(-183., -87.); + check(1000., -77.); + } + + Y_UNIT_TEST(TestRoundTripMercatorGeoMercator) { + auto check = [](double x, double y) { + auto pt = LLToMercator(MercatorToLL(TMercatorPoint{x, y})); + UNIT_ASSERT_DOUBLES_EQUAL_C(x, pt.X(), 1.e-4, "x for point (" << x << ", " << y << ")"); + UNIT_ASSERT_DOUBLES_EQUAL_C(y, pt.Y(), 1.e-4, "y for point (" << x << ", " << y << ")"); + }; + + check(100., 200.); + check(-123456., 654321.); + check(5.e7, 1.23456789); + check(1.e8, -2.e7); + } +} + +Y_UNIT_TEST_SUITE(TestDistance) { + Y_UNIT_TEST(TestGeodeticDistance) { + const TGeoPoint minsk(27.55, 53.916667); + const TGeoPoint moscow(37.617778, 55.755833); + const TGeoPoint newYork(-73.994167, 40.728333); + const TGeoPoint sydney(151.208333, -33.869444); + + const double eps = 1.E-6; // absolute error + + UNIT_ASSERT_DOUBLES_EQUAL(GeodeticDistance(minsk, minsk), 0.0, eps); + UNIT_ASSERT_DOUBLES_EQUAL(GeodeticDistance(minsk, moscow), 677190.08871321136, eps); + UNIT_ASSERT_DOUBLES_EQUAL(GeodeticDistance(minsk, newYork), 7129091.7536358498, eps); + UNIT_ASSERT_DOUBLES_EQUAL(GeodeticDistance(minsk, sydney), 15110861.267782301, eps); + + UNIT_ASSERT_DOUBLES_EQUAL(GeodeticDistance(moscow, minsk), 677190.08871321136, eps); + UNIT_ASSERT_DOUBLES_EQUAL(GeodeticDistance(moscow, moscow), 0.0, eps); + UNIT_ASSERT_DOUBLES_EQUAL(GeodeticDistance(moscow, newYork), 7519517.2469277605, eps); + UNIT_ASSERT_DOUBLES_EQUAL(GeodeticDistance(moscow, sydney), 14467193.188083574, eps); + + UNIT_ASSERT_DOUBLES_EQUAL(GeodeticDistance(newYork, minsk), 7129091.7536358498, eps); + UNIT_ASSERT_DOUBLES_EQUAL(GeodeticDistance(newYork, moscow), 7519517.2469277605, eps); + UNIT_ASSERT_DOUBLES_EQUAL(GeodeticDistance(newYork, newYork), 0.0, eps); + UNIT_ASSERT_DOUBLES_EQUAL(GeodeticDistance(newYork, sydney), 15954603.669226252, eps); + + UNIT_ASSERT_DOUBLES_EQUAL(GeodeticDistance(sydney, minsk), 15110861.267782301, eps); + UNIT_ASSERT_DOUBLES_EQUAL(GeodeticDistance(sydney, moscow), 14467193.188083574, eps); + UNIT_ASSERT_DOUBLES_EQUAL(GeodeticDistance(sydney, newYork), 15954603.669226252, eps); + UNIT_ASSERT_DOUBLES_EQUAL(GeodeticDistance(sydney, sydney), 0.0, eps); + } +} diff --git a/library/cpp/geo/ut/polygon_ut.cpp b/library/cpp/geo/ut/polygon_ut.cpp new file mode 100644 index 0000000000..cd9dee9759 --- /dev/null +++ b/library/cpp/geo/ut/polygon_ut.cpp @@ -0,0 +1,34 @@ +#include "polygon.h" + +#include <library/cpp/testing/unittest/registar.h> + +using namespace NGeo; + +Y_UNIT_TEST_SUITE(TGeoPolygonTest) { + Y_UNIT_TEST(TestEmptyPolygon) { + TGeoPolygon empty; + UNIT_ASSERT(!empty); + UNIT_ASSERT(!empty.IsValid()); + } + + Y_UNIT_TEST(TestPolygon) { + TGeoPolygon polygon({{1., 2.}, {2., 1.}, {2., 4.}, {1., 3.}}); + UNIT_ASSERT(polygon.IsValid()); + UNIT_ASSERT_EQUAL(polygon.GetWindow(), + TGeoWindow(TGeoPoint(1., 1.), TGeoPoint(2., 4.))); + } + + Y_UNIT_TEST(TestParse) { + UNIT_ASSERT_EQUAL(TGeoPolygon::Parse(TString{"1.23,5.67 7.89,10.11 11.10,9.87"}), + NGeo::TGeoPolygon({{1.23, 5.67}, {7.89, 10.11}, {11.10, 9.87}})); + UNIT_ASSERT_EQUAL(TGeoPolygon::Parse(TString{"1.23,5.67 7.89,10.11 11.10,9.87 6.54,3.21"}), + NGeo::TGeoPolygon({{1.23, 5.67}, {7.89, 10.11}, {11.10, 9.87}, {6.54, 3.21}})); + + UNIT_ASSERT(TGeoPolygon::TryParse(TString{"1.23,5.67 7.89,10.11"}).Empty()); + UNIT_ASSERT_EQUAL(TGeoPolygon::Parse(TString{"1.23+5.67~7.89+10.11~11.10+9.87"}, "+", "~"), + NGeo::TGeoPolygon({{1.23, 5.67}, {7.89, 10.11}, {11.10, 9.87}})); + + UNIT_ASSERT_EQUAL(TGeoPolygon::Parse(TString{"1.23+5.67+~7.89+10.11+~11.10+9.87"}, "+", "+~"), + NGeo::TGeoPolygon({{1.23, 5.67}, {7.89, 10.11}, {11.10, 9.87}})); + } +} diff --git a/library/cpp/geo/ut/size_ut.cpp b/library/cpp/geo/ut/size_ut.cpp new file mode 100644 index 0000000000..41b4a2c257 --- /dev/null +++ b/library/cpp/geo/ut/size_ut.cpp @@ -0,0 +1,29 @@ +#include "size.h" + +#include <library/cpp/testing/unittest/registar.h> +#include <util/generic/maybe.h> + +using namespace NGeo; + +Y_UNIT_TEST_SUITE(TSizeTest) { + Y_UNIT_TEST(TestFromString) { + UNIT_ASSERT_EQUAL(TSize::Parse("0.15,0.67"), TSize(0.15, 0.67)); + UNIT_ASSERT_EQUAL(TSize::Parse("0.15 0.67", " "), TSize(0.15, 0.67)); + + UNIT_ASSERT_EXCEPTION(TSize::Parse(""), TBadCastException); + UNIT_ASSERT_EXCEPTION(TSize::Parse("Hello,world"), TBadCastException); + UNIT_ASSERT_EXCEPTION(TSize::Parse("-1,-1"), TBadCastException); + + UNIT_ASSERT_EQUAL(TSize::Parse("424242 50", " "), TSize(424242., 50.)); + UNIT_ASSERT_EQUAL(TSize::Parse("50.,424242"), TSize(50., 424242.)); + UNIT_ASSERT_EQUAL(TSize::Parse(" 0.01, 0.01"), TSize(0.01, 0.01)); + UNIT_ASSERT_EXCEPTION(TSize::Parse("0.01 ,0.01"), TBadCastException); + UNIT_ASSERT_EXCEPTION(TSize::Parse("0.01,0.01 "), TBadCastException); + } + + Y_UNIT_TEST(TestTryFromString) { + UNIT_ASSERT(TSize::TryParse("1,2")); + UNIT_ASSERT(!TSize::TryParse("-1,-2")); + UNIT_ASSERT(!TSize::TryParse("1,2a")); + } +} diff --git a/library/cpp/geo/ut/util_ut.cpp b/library/cpp/geo/ut/util_ut.cpp new file mode 100644 index 0000000000..ebd86cfbd8 --- /dev/null +++ b/library/cpp/geo/ut/util_ut.cpp @@ -0,0 +1,36 @@ +#include <library/cpp/geo/util.h> + +#include <library/cpp/testing/unittest/registar.h> + +using namespace NGeo; + +Y_UNIT_TEST_SUITE(TGeoUtilTest) { + Y_UNIT_TEST(TestPointFromString) { + UNIT_ASSERT_EQUAL(PairFromString("27.56,53.90"), (std::pair<double, double>(27.56, 53.90))); + UNIT_ASSERT_EQUAL(PairFromString("27.56 53.90", " "), (std::pair<double, double>(27.56, 53.90))); + UNIT_ASSERT_EQUAL(PairFromString("27.56@@53.90", "@@"), (std::pair<double, double>(27.56, 53.90))); + UNIT_ASSERT_EXCEPTION(PairFromString("27.56@@53.90", "@"), TBadCastException); + UNIT_ASSERT_EXCEPTION(PairFromString(""), TBadCastException); + } + + Y_UNIT_TEST(TestTryPointFromString) { + std::pair<double, double> point; + + UNIT_ASSERT(TryPairFromString(point, "27.56,53.90")); + UNIT_ASSERT_EQUAL(point, (std::pair<double, double>(27.56, 53.90))); + + UNIT_ASSERT(TryPairFromString(point, "27.56 53.90", " ")); + UNIT_ASSERT_EQUAL(point, (std::pair<double, double>(27.56, 53.90))); + + UNIT_ASSERT(TryPairFromString(point, "27.56@@53.90", "@@")); + UNIT_ASSERT_EQUAL(point, (std::pair<double, double>(27.56, 53.90))); + + UNIT_ASSERT(!TryPairFromString(point, "27.56@@53.90", "@")); + UNIT_ASSERT(!TryPairFromString(point, "")); + } + + Y_UNIT_TEST(TestVisibleMapBound) { + const double expectedLat = MercatorToLL(TMercatorPoint(0., LLToMercator(TGeoPoint(180., 0.)).X())).Lat(); + UNIT_ASSERT_DOUBLES_EQUAL(VISIBLE_LATITUDE_BOUND, expectedLat, 1.e-14); + } +} diff --git a/library/cpp/geo/ut/window_ut.cpp b/library/cpp/geo/ut/window_ut.cpp new file mode 100644 index 0000000000..194fb4e735 --- /dev/null +++ b/library/cpp/geo/ut/window_ut.cpp @@ -0,0 +1,547 @@ +#include "window.h" +#include <library/cpp/testing/unittest/registar.h> +#include <util/generic/ymath.h> + +using namespace NGeo; + +namespace { + constexpr double DEFAULT_EPS = 1.E-5; + + bool CheckGeoPointEqual(const TGeoPoint& found, const TGeoPoint& expected, const double eps = DEFAULT_EPS) { + if (std::isnan(found.Lon()) || std::isnan(found.Lat())) { + Cerr << "NaNs found: (" << found.Lon() << ", " << found.Lat() << ")" << Endl; + return false; + } + if (Abs(found.Lon() - expected.Lon()) > eps) { + Cerr << "longitude differs: " << found.Lon() << " found, " << expected.Lon() << " expected" << Endl; + return false; + } + if (Abs(found.Lat() - expected.Lat()) > eps) { + Cerr << "latitude differs: " << found.Lat() << " found, " << expected.Lat() << " expected" << Endl; + return false; + } + return true; + } + + bool CheckSizeEqual(const TSize& found, const TSize& expected, const double eps = DEFAULT_EPS) { + if (std::isnan(found.GetWidth()) || std::isnan(found.GetHeight())) { + Cerr << "NaNs found: (" << found.GetWidth() << ", " << found.GetHeight() << ")" << Endl; + return false; + } + if (Abs(found.GetWidth() - expected.GetWidth()) > eps) { + Cerr << "width differs: " << found.GetWidth() << " found, " << expected.GetWidth() << " expected" << Endl; + return false; + } + if (Abs(found.GetHeight() - expected.GetHeight()) > eps) { + Cerr << "height differs: " << found.GetHeight() << " found, " << expected.GetHeight() << " expected" << Endl; + return false; + } + return true; + } + + bool CheckGeoWindowEqual(const TGeoWindow& lhs, const TGeoWindow& rhs, const double eps = DEFAULT_EPS) { + return CheckGeoPointEqual(lhs.GetCenter(), rhs.GetCenter(), eps) && CheckSizeEqual(lhs.GetSize(), rhs.GetSize(), eps); + } +} // namespace + +/** + * TGeoWindow + */ +Y_UNIT_TEST_SUITE(TGeoWindowTest) { + Y_UNIT_TEST(TestParser) { + UNIT_ASSERT_EQUAL(TGeoWindow::ParseFromCornersPoints("1.23,5.67", "7.65,3.21"), + TGeoWindow(TGeoPoint(1.23, 3.21), TGeoPoint(7.65, 5.67))); + UNIT_ASSERT_EQUAL(TGeoWindow::ParseFromCornersPoints("1.23~5.67", "7.65~3.21", "~"), + TGeoWindow(TGeoPoint(1.23, 3.21), TGeoPoint(7.65, 5.67))); + UNIT_ASSERT_EXCEPTION(TGeoWindow::ParseFromCornersPoints("1.23~5.67", "7.65~3.21"), TBadCastException); + + UNIT_ASSERT(TGeoWindow::TryParseFromCornersPoints("1.23~5.67", "7.65~3.21").Empty()); + UNIT_ASSERT(TGeoWindow::TryParseFromCornersPoints("1.23,5.67", "7.65,3.21").Defined()); + UNIT_ASSERT_EQUAL(TGeoWindow::TryParseFromCornersPoints("1.23,5.67", "7.65,3.21").GetRef(), + TGeoWindow(TGeoPoint(1.23, 3.21), TGeoPoint(7.65, 5.67))); + UNIT_ASSERT(TGeoWindow::TryParseFromCornersPoints("1.23+++5.67+", "7.65+++3.21+", "+++").Empty()); + + UNIT_ASSERT_EQUAL(TGeoWindow::ParseFromLlAndSpn("1.23,5.67", "0.1,0.2"), + TGeoWindow(TGeoPoint(1.23, 5.67), TSize(0.1, 0.2))); + UNIT_ASSERT_EQUAL(TGeoWindow::ParseFromLlAndSpn("1.23~5.67", "0.1~0.2", "~"), + TGeoWindow(TGeoPoint(1.23, 5.67), TSize(0.1, 0.2))); + UNIT_ASSERT_EXCEPTION(TGeoWindow::ParseFromLlAndSpn("1.23~5.67", "0.1~0.2"), TBadCastException); + UNIT_ASSERT(TGeoWindow::TryParseFromLlAndSpn("1.23~5.67", "0.1~0.2").Empty()); + UNIT_ASSERT(TGeoWindow::TryParseFromLlAndSpn("1.23~5.67", "0.1~0.2", "~").Defined()); + UNIT_ASSERT_EQUAL(TGeoWindow::TryParseFromLlAndSpn("1.23~5.67", "0.1~0.2", "~").GetRef(), + TGeoWindow(TGeoPoint(1.23, 5.67), TSize(0.1, 0.2))); + } + + Y_UNIT_TEST(TestConstructor) { + TGeoPoint center{55.50, 82.50}; + TSize size{5.00, 3.00}; + TGeoWindow window(center, size); + + UNIT_ASSERT_EQUAL(window.GetCenter(), center); + UNIT_ASSERT_EQUAL(window.GetSize(), size); + } + + Y_UNIT_TEST(TestPoles) { + { + TGeoWindow northPole{TGeoPoint{180., 90.}, TSize{1.5, 1.5}}; + UNIT_ASSERT(CheckGeoPointEqual(northPole.GetCenter(), TGeoPoint{180., 90.})); + UNIT_ASSERT(CheckGeoPointEqual(northPole.GetLowerLeftCorner(), TGeoPoint{179.25, 88.5})); + UNIT_ASSERT(CheckGeoPointEqual(northPole.GetUpperRightCorner(), TGeoPoint{180.75, 90.0})); + } + { + TGeoWindow tallWindow{TGeoPoint{37., 55.}, TSize{10., 180.}}; + UNIT_ASSERT(CheckGeoPointEqual(tallWindow.GetCenter(), TGeoPoint{37., 55.})); + UNIT_ASSERT(CheckGeoPointEqual(tallWindow.GetLowerLeftCorner(), TGeoPoint{32., -90.})); + UNIT_ASSERT(CheckGeoPointEqual(tallWindow.GetUpperRightCorner(), TGeoPoint{42., 90.})); + } + { + TGeoWindow world{TGeoPoint{0., 0.}, TSize{360., 180.}}; + UNIT_ASSERT(CheckGeoPointEqual(world.GetCenter(), TGeoPoint{0., 0.})); + UNIT_ASSERT(CheckGeoPointEqual(world.GetLowerLeftCorner(), TGeoPoint{-180., -90.})); + UNIT_ASSERT(CheckGeoPointEqual(world.GetUpperRightCorner(), TGeoPoint{180., 90.})); + } + { + TGeoWindow world{TGeoPoint{0., 0.}, TSize{360., 360.}}; + UNIT_ASSERT(CheckGeoPointEqual(world.GetCenter(), TGeoPoint{0., 0.})); + UNIT_ASSERT(CheckGeoPointEqual(world.GetLowerLeftCorner(), TGeoPoint{-180., -90.})); + UNIT_ASSERT(CheckGeoPointEqual(world.GetUpperRightCorner(), TGeoPoint{180., 90.})); + } + } + + Y_UNIT_TEST(TestBigSize) { + { + TGeoWindow w{TGeoPoint{37., 55.}, TSize{100., 179.}}; + UNIT_ASSERT(CheckGeoPointEqual(w.GetCenter(), TGeoPoint{37., 55.})); + UNIT_ASSERT(CheckGeoPointEqual(w.GetLowerLeftCorner(), TGeoPoint{-13., -89.09540675})); + UNIT_ASSERT(CheckGeoPointEqual(w.GetUpperRightCorner(), TGeoPoint{87., 89.90907637})); + } + } + + Y_UNIT_TEST(TestCenterWhenInitWithCorners) { + UNIT_ASSERT(CheckGeoPointEqual(TGeoWindow(TGeoPoint{5.00, 40.00}, TGeoPoint{25.00, 80.00}).GetCenter(), TGeoPoint{15.00, 67.17797})); + UNIT_ASSERT(CheckGeoPointEqual(TGeoWindow(TGeoPoint{-5.00, -40.00}, TGeoPoint{-25.00, -80.00}).GetCenter(), TGeoPoint{-15.00, -67.17797})); + } + + Y_UNIT_TEST(TestCornersWhenInitWithCenter) { + // check lat calc + UNIT_ASSERT_DOUBLES_EQUAL(TGeoWindow(TGeoPoint{25.00, 50.00}, TSize{10.00, 10.00}).GetLowerLeftCorner().Lat(), 44.73927, DEFAULT_EPS); + + // lat equals to 90 + UNIT_ASSERT_DOUBLES_EQUAL(TGeoWindow(TGeoPoint{25.00, 50.00}, TSize{10.00, 179.99999}).GetUpperRightCorner().Lat(), 90, DEFAULT_EPS); + + // lat equals to -90 + UNIT_ASSERT_DOUBLES_EQUAL(TGeoWindow(TGeoPoint{25.00, -50.00}, TSize{10.00, -179.99999}).GetUpperRightCorner().Lat(), -90, DEFAULT_EPS); + + // check naive lon calc + UNIT_ASSERT_DOUBLES_EQUAL(TGeoWindow(TGeoPoint{10, 10}, TSize{10, 5}).GetLowerLeftCorner().Lon(), 5, DEFAULT_EPS); + + // check lon equals to 190 (no wrapping) + UNIT_ASSERT_DOUBLES_EQUAL(TGeoWindow(TGeoPoint{20, 0}, TSize{340, 5}).GetUpperRightCorner().Lon(), 190, DEFAULT_EPS); + + UNIT_ASSERT_DOUBLES_EQUAL(TGeoWindow(TGeoPoint{-40, 0}, TSize{-280, 5}).GetUpperRightCorner().Lon(), -180, DEFAULT_EPS); + + // naive calculating when point is (0, 0) + UNIT_ASSERT(CheckGeoPointEqual(TGeoWindow(TGeoPoint{0, 0}, TSize{160, 160}).GetLowerLeftCorner(), TGeoPoint{-80, -80}, DEFAULT_EPS)); + UNIT_ASSERT(CheckGeoPointEqual(TGeoWindow(TGeoPoint{0, 0}, TSize{160, 160}).GetUpperRightCorner(), TGeoPoint{80, 80}, DEFAULT_EPS)); + } + + Y_UNIT_TEST(TestCenterSetter) { + TGeoPoint center{27.56, 53.90}; + TGeoWindow window{}; + window.SetCenter(center); + UNIT_ASSERT_EQUAL(window.GetCenter(), center); + } + + Y_UNIT_TEST(TestEqualOperator) { + TGeoWindow window{TGeoPoint{27.56, 53.90}, TGeoPoint{30.35, 56.89}}; + UNIT_ASSERT(window == window); + + TGeoWindow anotherWindow{TGeoPoint{60.10, 57.90}, TGeoPoint{60.70, 58.25}}; + UNIT_ASSERT(!(window == anotherWindow)); + } + + Y_UNIT_TEST(TestAssignmentOperator) { + TGeoWindow lhs{TGeoPoint{27.56, 53.90}, TGeoPoint{30.35, 53.89}}; + TGeoWindow rhs{}; + rhs = lhs; + UNIT_ASSERT_EQUAL(lhs, rhs); + } + + Y_UNIT_TEST(TestContainsMethod) { + // you could see cases here https://tech.yandex.ru/maps/jsbox/2.1/rectangle + // (pay attention that the first coord is lat and the second one is lon) + TGeoWindow window{TGeoPoint{27.45, 53.82}, TGeoPoint{27.65, 53.97}}; + + // point is inside the window + UNIT_ASSERT(window.Contains(TGeoPoint{27.55, 53.90})); + + // point is to the right of the window + UNIT_ASSERT(!window.Contains(TGeoPoint{27.66, 53.95})); + + // point is to the left of the window + UNIT_ASSERT(!window.Contains(TGeoPoint{27.44, 53.95})); + + // point is under the window + UNIT_ASSERT(!window.Contains(TGeoPoint{27.50, 53.81})); + + // point is above the window + UNIT_ASSERT(!window.Contains(TGeoPoint{27.50, 53.98})); + + // point is on border + UNIT_ASSERT(window.Contains(TGeoPoint{27.45, 53.86})); + UNIT_ASSERT(window.Contains(TGeoPoint{27.65, 53.86})); + UNIT_ASSERT(window.Contains(TGeoPoint{27.55, 53.82})); + UNIT_ASSERT(window.Contains(TGeoPoint{27.55, 53.97})); + + // negate coord + UNIT_ASSERT(TGeoWindow(TGeoPoint{-72.17, -38.82}, TGeoPoint{-68.95, -36.70}).Contains(TGeoPoint{-70.40, -37.75})); + + // special cases + UNIT_ASSERT(!TGeoWindow{}.Contains(TGeoPoint{60.09, 57.90})); + + UNIT_ASSERT(TGeoWindow(TGeoPoint{}, TGeoPoint{27.55, 53.90}).Contains(TGeoPoint{27.55, 53.90})); + UNIT_ASSERT(TGeoWindow(TGeoPoint{27.55, 53.90}, TGeoPoint{}).Contains(TGeoPoint{27.55, 53.90})); + } + + Y_UNIT_TEST(TestIntersectsMethod) { + // intersect only by lat + UNIT_ASSERT( + !Intersects( + TGeoWindow{TGeoPoint{27.60, 53.90}, TGeoPoint{27.80, 53.95}}, + TGeoWindow{TGeoPoint{27.30, 53.88}, TGeoPoint{27.50, 53.98}})); + + // intersect only by lon + UNIT_ASSERT( + !Intersects( + TGeoWindow{TGeoPoint{27.35, 54}, TGeoPoint{27.45, 54.10}}, + TGeoWindow{TGeoPoint{27.30, 53.88}, TGeoPoint{27.50, 53.98}})); + + // one inside another + UNIT_ASSERT( + Intersects( + TGeoWindow{TGeoPoint{27.35, 53.90}, TGeoPoint{27.45, 53.95}}, + TGeoWindow{TGeoPoint{27.30, 53.88}, TGeoPoint{27.50, 53.98}})); + + // intersection is point + UNIT_ASSERT( + !Intersects( + TGeoWindow{TGeoPoint{27.50, 53.98}, TGeoPoint{27.70, 54.00}}, + TGeoWindow{TGeoPoint{27.30, 53.88}, TGeoPoint{27.50, 53.98}})); + + // intersection is segment + UNIT_ASSERT( + !Intersects( + TGeoWindow{TGeoPoint{27.40, 53.98}, TGeoPoint{27.70, 54.00}}, + TGeoWindow{TGeoPoint{27.30, 53.88}, TGeoPoint{27.50, 53.98}})); + + // intersection is area + UNIT_ASSERT( + Intersects( + TGeoWindow{TGeoPoint{27.40, 53.90}, TGeoPoint{27.70, 54.00}}, + TGeoWindow{TGeoPoint{27.30, 53.88}, TGeoPoint{27.50, 53.98}})); + + // equal windows + TGeoWindow window{TGeoPoint{27.60, 53.88}, TGeoPoint{27.80, 53.98}}; + UNIT_ASSERT(Intersects(window, window)); + } + + Y_UNIT_TEST(TestIntersectionMethod) { + // non-intersecting window + UNIT_ASSERT( + !(Intersection( + TGeoWindow{TGeoPoint{37.66, 55.66}, TGeoPoint{37.53, 55.64}}, + TGeoWindow{TGeoPoint{37.67, 55.66}, TGeoPoint{37.69, 55.71}}))); + + // one inside another + UNIT_ASSERT(CheckGeoWindowEqual( + Intersection( + TGeoWindow{TGeoPoint{37.00, 55.00}, TSize{10.00, 10.00}}, + TGeoWindow{TGeoPoint{37.00, 55.00}, TSize{2.00, 2.00}}) + .GetRef(), + (TGeoWindow{TGeoPoint{37.00, 55.00}, TSize{2.00, 2.00}}))); + + // cross + UNIT_ASSERT(CheckGeoWindowEqual( + Intersection( + TGeoWindow{TGeoPoint{37.00, 55.00}, TSize{10.00, 2.00}}, + TGeoWindow{TGeoPoint{37.00, 55.00}, TSize{2.00, 10.00}}) + .GetRef(), + (TGeoWindow{TGeoPoint{37.00, 55.00}, TSize{2.00, 2.00}}))); + + // intersection is a point + UNIT_ASSERT(CheckGeoWindowEqual( + Intersection( + TGeoWindow{TGeoPoint{27.50, 53.98}, TGeoPoint{27.70, 54.00}}, + TGeoWindow{TGeoPoint{27.30, 53.88}, TGeoPoint{27.50, 53.98}}) + .GetRef(), + (TGeoWindow{TGeoPoint{27.50, 53.98}, TSize{0, 0}}))); + + // intersection is a segment + UNIT_ASSERT(CheckGeoWindowEqual( + Intersection( + TGeoWindow{TGeoPoint{27.40, 53.98}, TGeoPoint{27.70, 54.00}}, + TGeoWindow{TGeoPoint{27.30, 53.88}, TGeoPoint{27.50, 53.98}}) + .GetRef(), + (TGeoWindow{TGeoPoint{27.45, 53.98}, TSize{0.10, 0}}))); + + // intersection is area + UNIT_ASSERT(CheckGeoWindowEqual( + Intersection( + TGeoWindow{TGeoPoint{27.40, 53.90}, TGeoPoint{27.70, 54.00}}, + TGeoWindow{TGeoPoint{27.30, 53.88}, TGeoPoint{27.50, 53.98}}) + .GetRef(), + (TGeoWindow{TGeoPoint{27.40, 53.90}, TGeoPoint{27.50, 53.98}}))); + + // special cases + UNIT_ASSERT( + !(Intersection( + TGeoWindow{TGeoPoint{27.30, 53.88}, TGeoPoint{27.50, 53.98}}, + TGeoWindow{}))); + } + + Y_UNIT_TEST(TestDistanceMethod) { + // one window inside another + UNIT_ASSERT_DOUBLES_EQUAL( + (TGeoWindow{TGeoPoint{27.50, 53.98}, TGeoPoint{27.80, 54.10}}) + .Distance(TGeoWindow{TGeoPoint{27.55, 54.00}, TGeoPoint{27.70, 54.07}}), + 0, + 1.E-5); + + // gap only by lon + UNIT_ASSERT_DOUBLES_EQUAL( + (TGeoWindow{TGeoPoint{27.50, 53.98}, TGeoPoint{27.60, 54.10}}) + .Distance(TGeoWindow{TGeoPoint{27.69, 54.10}, TGeoPoint{27.90, 54.20}}), + 0.052773, + 1.E-5); + + // gap only by lat + UNIT_ASSERT_DOUBLES_EQUAL( + (TGeoWindow{TGeoPoint{27.50, 53.98}, TGeoPoint{27.60, 54.10}}) + .Distance(TGeoWindow{TGeoPoint{27.50, 54.20}, TGeoPoint{27.70, 54.30}}), + 0.1, + 1.E-5); + + // gap by lot and lat, you can calculate answer using two previous tests + UNIT_ASSERT_DOUBLES_EQUAL( + (TGeoWindow{TGeoPoint{27.50, 53.98}, TGeoPoint{27.60, 54.10}} + .Distance(TGeoWindow{TGeoPoint{27.69, 54.20}, TGeoPoint{27.70, 54.30}})), + 0.11304, + 1.E-5); + + // negate coord + UNIT_ASSERT_DOUBLES_EQUAL( + (TGeoWindow{TGeoPoint{-27.50, -53.98}, TGeoPoint{-27.60, -54.10}} + .Distance(TGeoWindow{TGeoPoint{-27.69, -54.20}, TGeoPoint{-27.70, -54.30}})), + 0.11304, + 1.E-5); + } + + Y_UNIT_TEST(TestApproxDistanceMethod) { + // point inside + UNIT_ASSERT_DOUBLES_EQUAL( + (TGeoWindow{TGeoPoint{27.50, 53.98}, TGeoPoint{27.80, 54.10}}) + .GetApproxDistance(TGeoPoint{27.60, 54.05}), + 0, + 1.E-5); + + // gap only by lon + UNIT_ASSERT_DOUBLES_EQUAL( + (TGeoWindow{TGeoPoint{27.50, 54.00}, TGeoPoint{27.60, 54.10}}) + .GetApproxDistance(TGeoPoint{27.70, 54.05}), + 6535.3, + 0.1); + + // gap only by lat + UNIT_ASSERT_DOUBLES_EQUAL( + (TGeoWindow{TGeoPoint{27.50, 54.00}, TGeoPoint{27.60, 54.10}}) + .GetApproxDistance(TGeoPoint{27.55, 53.95}), + 5566.0, + 0.1); + + // gap by lot and lat + UNIT_ASSERT_DOUBLES_EQUAL( + (TGeoWindow{TGeoPoint{27.50, 54.00}, TGeoPoint{27.60, 54.10}}) + .GetApproxDistance(TGeoPoint{27.70, 54.20}), + 12900.6, + 0.1); + + // negate coord + UNIT_ASSERT_DOUBLES_EQUAL( + (TGeoWindow{TGeoPoint{-27.50, -54.00}, TGeoPoint{-27.60, -54.10}}) + .GetApproxDistance(TGeoPoint{-27.70, -54.20}), + 12900.6, + 0.1); + } + + Y_UNIT_TEST(TestUnionMethod) { + // one inside another + UNIT_ASSERT(CheckGeoWindowEqual( + Union( + TGeoWindow{TGeoPoint{37.00, 55.00}, TSize{2.00, 3.00}}, + TGeoWindow{TGeoPoint{37.10, 55.20}, TSize{1.50, 1.00}}), + TGeoWindow(TGeoPoint{37.00, 55.00}, TSize{2.00, 3.00}))); + + // non-intersecting windows + UNIT_ASSERT(CheckGeoWindowEqual( + Union( + TGeoWindow{TGeoPoint{37.00, 55.00}, TGeoPoint{37.10, 55.10}}, + TGeoWindow{TGeoPoint{37.20, 55.20}, TGeoPoint{37.30, 55.30}}), + TGeoWindow(TGeoPoint{37.00, 55.00}, TGeoPoint{37.30, 55.30}))); + + // negate coords, one inside another + UNIT_ASSERT(CheckGeoWindowEqual( + Union( + TGeoWindow{TGeoPoint{-57.62, -20.64}, TSize{2.00, 4.00}}, + TGeoWindow{TGeoPoint{-57.62, -20.64}, TSize{12.00, 10.00}}), + TGeoWindow(TGeoPoint{-57.62, -20.64}, TSize{12.00, 10.00}), 1.E-2)); + + // cross + UNIT_ASSERT(CheckGeoWindowEqual( + Union( + TGeoWindow{TGeoPoint{-3.82, 5.52}, TGeoPoint{0.10, 6.50}}, + TGeoWindow{TGeoPoint{-1.5, 4.20}, TGeoPoint{-0.5, 7.13}}), + TGeoWindow(TGeoPoint{-3.82, 4.20}, TGeoPoint{0.10, 7.13}))); + + // special cases + UNIT_ASSERT(CheckGeoWindowEqual( + Union( + TGeoWindow{TGeoPoint{-3.82, 5.52}, TGeoPoint{0.10, 6.50}}, + TGeoWindow{}), + TGeoWindow(TGeoPoint{-3.82, 5.52}, TGeoPoint{361., 181.}))); + + UNIT_ASSERT(CheckGeoWindowEqual( + Union( + TGeoWindow{}, + TGeoWindow{TGeoPoint{-3.82, 5.52}, TGeoPoint{0.10, 6.50}}), + TGeoWindow(TGeoPoint{-3.82, 5.52}, TGeoPoint{361., 181.}))); + } + + Y_UNIT_TEST(TestStretchMethod) { + TSize size{0.5, 1}; + TGeoPoint center{27.40, 53.90}; + TGeoWindow window{}; + double multiplier = 0; + + // multiplier is less than 1. + window = {center, size}; + multiplier = 0.5; + + UNIT_ASSERT(CheckGeoPointEqual(window.GetLowerLeftCorner(), TGeoPoint{27.14999, 53.39699})); + UNIT_ASSERT(CheckGeoPointEqual(window.GetUpperRightCorner(), TGeoPoint{27.65000, 54.39699})); + + window.Stretch(multiplier); + UNIT_ASSERT(CheckGeoWindowEqual(window, TGeoWindow{center, TSize{0.25, 0.5}})); + UNIT_ASSERT(CheckGeoPointEqual(window.GetLowerLeftCorner(), TGeoPoint{27.27499, 53.64925})); + UNIT_ASSERT(CheckGeoPointEqual(window.GetUpperRightCorner(), TGeoPoint{27.52500, 54.14924})); + + // multiplier is greater than 1. + window = {center, size}; + multiplier = 2.2; + + window.Stretch(multiplier); + UNIT_ASSERT(CheckGeoWindowEqual(window, TGeoWindow{center, TSize{1.1, 2.2}})); + UNIT_ASSERT(CheckGeoPointEqual(window.GetLowerLeftCorner(), TGeoPoint{26.84999, 52.78545})); + UNIT_ASSERT(CheckGeoPointEqual(window.GetUpperRightCorner(), TGeoPoint{27.95000, 54.98545})); + + // invalid multiplier + window = {center, size}; + multiplier = 100.; + + window.Stretch(multiplier); + UNIT_ASSERT(CheckGeoWindowEqual(window, TGeoWindow{center, TSize{50, 100}})); + UNIT_ASSERT(CheckGeoPointEqual(window.GetLowerLeftCorner(), TGeoPoint{2.40000, -18.88352})); + UNIT_ASSERT(CheckGeoPointEqual(window.GetUpperRightCorner(), TGeoPoint{52.39999, 81.26212})); + + // invalid multiplier + window = {center, size}; + multiplier = 0; + + window.Stretch(multiplier); + UNIT_ASSERT(CheckGeoWindowEqual(window, TGeoWindow{center, TSize{0, 0}})); + UNIT_ASSERT(CheckGeoPointEqual(window.GetLowerLeftCorner(), TGeoPoint{27.39999, 53.90000})); + UNIT_ASSERT(CheckGeoPointEqual(window.GetUpperRightCorner(), TGeoPoint{27.39999, 53.90000})); + + // invalid multiplier + window = {center, size}; + multiplier = -5.; + + window.Stretch(multiplier); + UNIT_ASSERT(CheckGeoWindowEqual(window, TGeoWindow{center, TSize{-2.5, -5}})); + UNIT_ASSERT(CheckGeoPointEqual(window.GetLowerLeftCorner(), TGeoPoint{28.64999, 56.32495})); + UNIT_ASSERT(CheckGeoPointEqual(window.GetUpperRightCorner(), TGeoPoint{26.15000, 51.32491})); + } +} + +/** + * TMercatorWindow + */ +Y_UNIT_TEST_SUITE(TMercatorWindowTest) { + Y_UNIT_TEST(TestConstructor) { + // init with two corners + TMercatorPoint lowerLeft{5, 3}; + TMercatorPoint upperRight{10, 20}; + TMercatorWindow window{lowerLeft, upperRight}; + + UNIT_ASSERT_EQUAL(window.GetWidth(), 5.); + UNIT_ASSERT_EQUAL(window.GetHeight(), 17.); + UNIT_ASSERT_EQUAL(window.GetCenter(), (TMercatorPoint{7.5, 11.5})); + + TMercatorPoint center{8, 12}; + TSize size{5, 17}; + window = {center, size}; + UNIT_ASSERT_EQUAL(window.GetUpperRightCorner().X(), 10.5); + UNIT_ASSERT_EQUAL(window.GetUpperRightCorner().Y(), 20.5); + UNIT_ASSERT_EQUAL(window.GetLowerLeftCorner().X(), 5.5); + UNIT_ASSERT_EQUAL(window.GetLowerLeftCorner().Y(), 3.5); + } + + Y_UNIT_TEST(TestInflateMethod) { + TSize size{200, 500}; + TMercatorPoint center{441, 688}; + TMercatorWindow window{}; + int add = 10; + + window = {center, size}; + UNIT_ASSERT_EQUAL(window.GetLowerLeftCorner(), TMercatorPoint(341, 438)); + UNIT_ASSERT_EQUAL(window.GetUpperRightCorner(), TMercatorPoint(541, 938)); + window.Inflate(add); + UNIT_ASSERT_EQUAL(window, TMercatorWindow(center, TSize{220, 520})); + UNIT_ASSERT_EQUAL(window.GetLowerLeftCorner(), TMercatorPoint(331, 428)); + UNIT_ASSERT_EQUAL(window.GetUpperRightCorner(), TMercatorPoint(551, 948)); + + // negate coords + center = {-441, -688}; + window = {center, size}; + UNIT_ASSERT_EQUAL(window.GetLowerLeftCorner(), TMercatorPoint(-541, -938)); + UNIT_ASSERT_EQUAL(window.GetUpperRightCorner(), TMercatorPoint(-341, -438)); + window.Inflate(add); + UNIT_ASSERT_EQUAL(window, TMercatorWindow(center, TSize{220, 520})); + UNIT_ASSERT_EQUAL(window.GetLowerLeftCorner(), TMercatorPoint(-551, -948)); + UNIT_ASSERT_EQUAL(window.GetUpperRightCorner(), TMercatorPoint(-331, -428)); + + // size becomes negate + size = {6, 12}; + center = {0, 0}; + window = {center, size}; + UNIT_ASSERT_EQUAL(window.GetLowerLeftCorner(), TMercatorPoint(-3, -6)); + UNIT_ASSERT_EQUAL(window.GetUpperRightCorner(), TMercatorPoint(3, 6)); + + add = -20; + window.Inflate(add); + UNIT_ASSERT_EQUAL(window, TMercatorWindow(center, TSize{-34, -28})); + UNIT_ASSERT_EQUAL(window.GetLowerLeftCorner(), TMercatorPoint(17, 14)); + UNIT_ASSERT_EQUAL(window.GetUpperRightCorner(), TMercatorPoint(-17, -14)); + UNIT_ASSERT_EQUAL(window.GetSize(), TSize(-34, -28)); + + // big add param + size = {10, 15}; + center = {5, 10}; + window = {center, size}; + + add = static_cast<int>(1E5); + window.Inflate(add); + UNIT_ASSERT_EQUAL(window, TMercatorWindow(center, TSize{200'010, 200'015})); + UNIT_ASSERT_EQUAL(window.GetLowerLeftCorner(), TMercatorPoint(-100'000, -99'997.5)); + UNIT_ASSERT_EQUAL(window.GetUpperRightCorner(), TMercatorPoint(100'010, 100'017.5)); + } +} diff --git a/library/cpp/geo/ut/ya.make b/library/cpp/geo/ut/ya.make new file mode 100644 index 0000000000..5bd891db1f --- /dev/null +++ b/library/cpp/geo/ut/ya.make @@ -0,0 +1,12 @@ +UNITTEST_FOR(library/cpp/geo) + +SRCS( + load_save_helper_ut.cpp + polygon_ut.cpp + point_ut.cpp + size_ut.cpp + util_ut.cpp + window_ut.cpp +) + +END() diff --git a/library/cpp/geo/util.cpp b/library/cpp/geo/util.cpp new file mode 100644 index 0000000000..e8d0fc378e --- /dev/null +++ b/library/cpp/geo/util.cpp @@ -0,0 +1,34 @@ +#include "util.h" + +#include <math.h> +#include <util/generic/cast.h> +#include <util/generic/string.h> +#include <util/string/cast.h> +#include <utility> + +namespace NGeo { + bool TryPairFromString(std::pair<double, double>& res, TStringBuf inputStr, TStringBuf delimiter) { + TStringBuf lhsStr; + TStringBuf rhsStr; + + double lhs = NAN; + double rhs = NAN; + if ( + !inputStr.TrySplit(delimiter, lhsStr, rhsStr) || + !TryFromString<double>(lhsStr, lhs) || + !TryFromString<double>(rhsStr, rhs)) { + return false; + } + + res = {lhs, rhs}; + return true; + } + + std::pair<double, double> PairFromString(TStringBuf inputStr, TStringBuf delimiter) { + std::pair<double, double> res; + if (!TryPairFromString(res, inputStr, delimiter)) { + ythrow TBadCastException() << "Wrong point string: " << inputStr; + } + return res; + } +} // namespace NGeo diff --git a/library/cpp/geo/util.h b/library/cpp/geo/util.h new file mode 100644 index 0000000000..18b411e6a4 --- /dev/null +++ b/library/cpp/geo/util.h @@ -0,0 +1,107 @@ +#pragma once + +#include "point.h" +#include "size.h" +#include "window.h" + +#include <util/generic/ymath.h> + +namespace NGeo { + constexpr double MIN_LATITUDE = -90.; + constexpr double MAX_LATITUDE = +90.; + constexpr double MIN_LONGITUDE = -180.; + constexpr double MAX_LONGITUDE = +180.; + constexpr double WORLD_WIDTH = MAX_LONGITUDE - MIN_LONGITUDE; + constexpr double WORLD_HEIGHT = MAX_LATITUDE - MIN_LATITUDE; + + // The Mercator projection is truncated at certain latitude so that the visible world forms a square. The poles are not shown. + constexpr double VISIBLE_LATITUDE_BOUND = 85.084059050109785; + + inline double Deg2rad(double d) { + return d * PI / 180; + } + + inline double Rad2deg(double d) { + return d * 180 / PI; + } + + inline double GetLongitudeFromMetersAtEquator(double meters) { + return Rad2deg(meters * (1. / WGS84::R)); + } + + inline double GetMetersFromDeg(double angle) { + return Deg2rad(angle) * NGeo::WGS84::R; + } + + inline double GetLatCos(double latDegree) { + return cos(Deg2rad(latDegree)); + } + + /** + * Get Inversed cosinus of latitude + * It is more precise, than division of two big doubles + * It is safe for lattitue at 90 degrees + */ + inline double GetInversedLatCosSafe(double latDegree) { + return 1. / Max(0.001, cos(Deg2rad(latDegree))); + } + + /** + * Gets Lontitude width for given width at equator and latitude + */ + inline double GetWidthAtLatitude(double widthEquator, double latDegree) { + return widthEquator * GetInversedLatCosSafe(latDegree); + } + + inline double GetWidthAtLatitude(double widthEquator, const TGeoPoint& p) { + return GetWidthAtLatitude(widthEquator, p.Lat()); + } + + /* + * Returns Normalised width at equator for specified width at latitude and latitude + */ + + inline double GetWidthAtEquator(double widthAtLatitude, double latDegree) { + return widthAtLatitude * GetLatCos(latDegree); + } + + inline double GetWidthAtEquator(double widthAtLatitude, const TGeoPoint& p) { + return GetWidthAtEquator(widthAtLatitude, p.Lat()); + } + + /* + * Same for size + */ + + inline TSize GetSizeAtLatitude(const TSize& sizeAtEquator, const TGeoPoint& at) { + return TSize(GetWidthAtLatitude(sizeAtEquator.GetWidth(), at), sizeAtEquator.GetHeight()); + } + + inline TSize GetSizeAtEquator(const TSize& sizeAtLatitude, const TGeoPoint& at) { + return TSize(GetWidthAtEquator(sizeAtLatitude.GetWidth(), at), sizeAtLatitude.GetHeight()); + } + + inline TGeoWindow ConstructWindowFromEquatorSize(const TGeoPoint& center, const TSize& sizeAtEquator) { + return TGeoWindow(center, GetSizeAtLatitude(sizeAtEquator, center)); + } + + inline double SquaredDiagonal(const NGeo::TSize& size, double latitude) { + return Sqr(NGeo::GetWidthAtEquator(size.GetWidth(), latitude)) + Sqr(size.GetHeight()); + } + + inline double Diagonal(const NGeo::TSize& size, double latitude) { + return sqrt(SquaredDiagonal(size, latitude)); + } + + /** + * try to parse two coords from string + * return pair of coords on success, otherwise throw exception + */ + std::pair<double, double> PairFromString(TStringBuf inputStr, TStringBuf delimiter = TStringBuf(",")); + + /** + * try to parse two coords from string + * write result to first param and return true on success, otherwise return false + */ + bool TryPairFromString(std::pair<double, double>& res, TStringBuf inputStr, TStringBuf delimiter = TStringBuf(",")); +} // namespace NGeo diff --git a/library/cpp/geo/window.cpp b/library/cpp/geo/window.cpp new file mode 100644 index 0000000000..2ad2b61b71 --- /dev/null +++ b/library/cpp/geo/window.cpp @@ -0,0 +1,297 @@ +#include "window.h" + +#include "util.h" + +#include <util/generic/ylimits.h> +#include <util/generic/ymath.h> +#include <util/generic/maybe.h> + +#include <cstdlib> +#include <utility> + +namespace NGeo { + namespace { + TMercatorPoint GetMiddlePoint(const TMercatorPoint& p1, const TMercatorPoint& p2) { + return TMercatorPoint{(p1.X() + p2.X()) / 2, (p1.Y() + p2.Y()) / 2}; + } + + struct TLatBounds { + double LatMin; + double LatMax; + }; + } // namespace + + bool TrySpan2LatitudeDegenerateCases(double ll, double lspan, TLatBounds& result) { + // TODO(sobols@): Compare with eps? + if (Y_UNLIKELY(lspan >= 180.)) { + result.LatMin = -90.; + result.LatMax = +90.; + return true; + } + if (Y_UNLIKELY(ll == +90.)) { + result.LatMin = ll - lspan; + result.LatMax = ll; + return true; + } + if (Y_UNLIKELY(ll == -90.)) { + result.LatMin = ll; + result.LatMax = ll + lspan; + return true; + } + return false; + } + + /** + * Finds such latitudes lmin, lmax that: + * 1) lmin <= ll <= lmax, + * 2) lmax - lmin == lspan, + * 3) MercatorY(ll) - MercatorY(lmin) == MercatorY(lmax) - MercatorY(ll) + * (the ll parallel is a center between lmin and lmax parallels in Mercator projection) + * + * \returns a pair (lmin, lmax) + */ + TLatBounds Span2Latitude(double ll, double lspan) { + TLatBounds result{}; + if (TrySpan2LatitudeDegenerateCases(ll, lspan, result)) { + return result; + } + + const double lc = Deg2rad(ll); + const double h = Deg2rad(lspan); + + // Spherical (Pseudo) Mercator: + // MercatorY(lc) = R * ln(tan(lc / 2 + PI / 4)). + // Note that + // ln(a) - ln(b) = ln(a / b) + // That'a why + // MercatorY(lc) - MercatorY(lmin) == MercatorY(lmin + h) - MercatorY(lc) <=> + // <=> tan(lc / 2 + PI / 4) / tan(lmin / 2 + PI / 4) == + // == tan(lmin / 2 + h / 2 + PI / 4) / tan(lc / 2 + PI / 4). + // Also note that + // tan(x + y) == (tan(x) + tan(y)) / (1 - tan(x) * tan(y)), + // so + // tan(lmin / 2 + h / 2 + PI / 4) == + // == (tan(lmin / 2 + PI / 4) + tan(h / 2)) / (1 - tan(lmin / 2 + PI / 4) * tan(h / 2)) + + const double yx = tan(lc / 2 + PI / 4); + + // Let x be tan(lmin / 2 + PI / 4), + // then + // yx / x == (x + tan(h / 2)) / ((1 - x * tan(h / 2)) * yx), + // or + // yx^2 * (1 - x * tan(h / 2)) == (x + tan(h / 2)) * x. + // Now we solve a quadratic equation: + // x^2 + bx + c == 0 + + const double C = yx * yx; + + const double b = (C + 1) * tan(h / 2), c = -C; + const double D = b * b - 4 * c; + const double root = (-b + sqrt(D)) / 2; + + result.LatMin = Rad2deg((atan(root) - PI / 4) * 2); + result.LatMax = result.LatMin + lspan; + return result; + } + + void TGeoWindow::CalcCorners() { + if (!IsValid()) { + return; + } + const TLatBounds latBounds = Span2Latitude(Center_.Lat(), Size_.GetHeight()); + + if (-90. < latBounds.LatMin && latBounds.LatMax < +90.) { + TMercatorPoint lowerLeftCornerM = LLToMercator(TGeoPoint(Center_.Lon() - (Size_.GetWidth() / 2), latBounds.LatMin)); + TMercatorPoint upperRightCornerM = LLToMercator(TGeoPoint(Center_.Lon() + (Size_.GetWidth() / 2), latBounds.LatMax)); + TMercatorPoint centerM = LLToMercator(Center_); + + double w = upperRightCornerM.X() - lowerLeftCornerM.X(); + double h = upperRightCornerM.Y() - lowerLeftCornerM.Y(); + + LowerLeftCorner_ = MercatorToLL(TMercatorPoint(centerM.X() - w / 2, centerM.Y() - h / 2)); + UpperRightCorner_ = MercatorToLL(TMercatorPoint(centerM.X() + w / 2, centerM.Y() + h / 2)); + } else { + LowerLeftCorner_ = TGeoPoint(Center_.Lon() - (Size_.GetWidth() / 2), latBounds.LatMin); + UpperRightCorner_ = TGeoPoint(Center_.Lon() + (Size_.GetWidth() / 2), latBounds.LatMax); + } + } + + void TGeoWindow::CalcCenterAndSpan() { + if (!LowerLeftCorner_ || !UpperRightCorner_) { + return; + } + + TMercatorPoint lower = LLToMercator(LowerLeftCorner_); + TMercatorPoint upper = LLToMercator(UpperRightCorner_); + TMercatorPoint center = GetMiddlePoint(lower, upper); + Center_ = MercatorToLL(center); + + Size_ = TSize(UpperRightCorner_.Lon() - LowerLeftCorner_.Lon(), + UpperRightCorner_.Lat() - LowerLeftCorner_.Lat()); + } + + bool TGeoWindow::Contains(const TGeoPoint& p) const { + return LowerLeftCorner_.Lon() <= p.Lon() && p.Lon() <= UpperRightCorner_.Lon() && + LowerLeftCorner_.Lat() <= p.Lat() && p.Lat() <= UpperRightCorner_.Lat(); + } + + double TGeoWindow::Diameter() const { + return Diagonal(Size_, Center_.Lat()); + } + + double TGeoWindow::Distance(const TGeoWindow& w) const { + const double minX = Max(GetLowerLeftCorner().Lon(), w.GetLowerLeftCorner().Lon()); + const double maxX = Min(GetUpperRightCorner().Lon(), w.GetUpperRightCorner().Lon()); + const double minY = Max(GetLowerLeftCorner().Lat(), w.GetLowerLeftCorner().Lat()); + const double maxY = Min(GetUpperRightCorner().Lat(), w.GetUpperRightCorner().Lat()); + double xGap = minX > maxX ? (minX - maxX) : 0.; + double yGap = minY > maxY ? (minY - maxY) : 0.; + return sqrtf(Sqr(xGap * cos((minY + maxY) * 0.5 * PI / 180)) + Sqr(yGap)); + } + + double TWindowLL::GetApproxDistance(const TPointLL& point) const { + const double metresInDegree = WGS84::R * PI / 180; + return Distance(TWindowLL{point, point}) * metresInDegree; + } + + TGeoWindow TGeoWindow::ParseFromCornersPoints(TStringBuf leftCornerStr, TStringBuf rightCornerStr, TStringBuf delimiter) { + auto leftCorner = TGeoPoint::Parse(leftCornerStr, delimiter); + auto rightCorner = TGeoPoint::Parse(rightCornerStr, delimiter); + + return {leftCorner, rightCorner}; + } + + TMaybe<TGeoWindow> TGeoWindow::TryParseFromCornersPoints(TStringBuf leftCornerStr, TStringBuf rightCornerStr, TStringBuf delimiter) { + auto leftCorner = TGeoPoint::TryParse(leftCornerStr, delimiter); + auto rightCorner = TGeoPoint::TryParse(rightCornerStr, delimiter); + if (!leftCorner || !rightCorner) { + return {}; + } + + return TGeoWindow{*leftCorner, *rightCorner}; + } + + TGeoWindow TGeoWindow::ParseFromLlAndSpn(TStringBuf llStr, TStringBuf spnStr, TStringBuf delimiter) { + TGeoPoint ll = TGeoPoint::Parse(llStr, delimiter); + TSize spn = TSize::Parse(spnStr, delimiter); + + return {ll, spn}; + } + + TMaybe<TGeoWindow> TGeoWindow::TryParseFromLlAndSpn(TStringBuf llStr, TStringBuf spnStr, TStringBuf delimiter) { + auto ll = TGeoPoint::TryParse(llStr, delimiter); + auto spn = TSize::TryParse(spnStr, delimiter); + + if (!ll || !spn) { + return {}; + } + + return TGeoWindow{*ll, *spn}; + } + /** + * TMercatorWindow + */ + + TMercatorWindow::TMercatorWindow() noexcept + : HalfWidth_{std::numeric_limits<double>::quiet_NaN()} + , HalfHeight_{std::numeric_limits<double>::quiet_NaN()} + { + } + + TMercatorWindow::TMercatorWindow(const TMercatorPoint& center, const TSize& size) noexcept + : Center_{center} + , HalfWidth_{size.GetWidth() / 2} + , HalfHeight_{size.GetHeight() / 2} + { + } + + TMercatorWindow::TMercatorWindow(const TMercatorPoint& firstPoint, const TMercatorPoint& secondPoint) noexcept + : Center_{GetMiddlePoint(firstPoint, secondPoint)} + , HalfWidth_{Abs(secondPoint.X() - firstPoint.X()) / 2} + , HalfHeight_{Abs(secondPoint.Y() - firstPoint.Y()) / 2} + { + } + + bool TMercatorWindow::Contains(const TMercatorPoint& pt) const noexcept { + return (Center_.X() - HalfWidth_ <= pt.X()) && + (pt.X() <= Center_.X() + HalfWidth_) && + (Center_.Y() - HalfHeight_ <= pt.Y()) && + (pt.Y() <= Center_.Y() + HalfHeight_); + } + + /** + * Conversion + */ + + TMercatorWindow LLToMercator(const TGeoWindow& window) { + return TMercatorWindow{LLToMercator(window.GetLowerLeftCorner()), LLToMercator(window.GetUpperRightCorner())}; + } + + TGeoWindow MercatorToLL(const TMercatorWindow& window) { + return TGeoWindow{MercatorToLL(window.GetLowerLeftCorner()), MercatorToLL(window.GetUpperRightCorner())}; + } + + /** + * Operators + */ + + TMaybe<TGeoWindow> Intersection(const TGeoWindow& lhs, const TGeoWindow& rhs) { + const double minX = Max(lhs.GetLowerLeftCorner().Lon(), rhs.GetLowerLeftCorner().Lon()); + const double maxX = Min(lhs.GetUpperRightCorner().Lon(), rhs.GetUpperRightCorner().Lon()); + const double minY = Max(lhs.GetLowerLeftCorner().Lat(), rhs.GetLowerLeftCorner().Lat()); + const double maxY = Min(lhs.GetUpperRightCorner().Lat(), rhs.GetUpperRightCorner().Lat()); + if (minX > maxX || minY > maxY) { + return {}; + } + return TGeoWindow(TGeoPoint(minX, minY), TGeoPoint(maxX, maxY)); + } + + TMaybe<TGeoWindow> Intersection(const TMaybe<TGeoWindow>& lhs, const TMaybe<TGeoWindow>& rhs) { + if (!lhs || !rhs) { + return {}; + } + return Intersection(*lhs, *rhs); + } + + TGeoWindow Union(const TGeoWindow& lhs, const TGeoWindow& rhs) { + const double minX = Min(lhs.GetLowerLeftCorner().Lon(), rhs.GetLowerLeftCorner().Lon()); + const double maxX = Max(lhs.GetUpperRightCorner().Lon(), rhs.GetUpperRightCorner().Lon()); + const double minY = Min(lhs.GetLowerLeftCorner().Lat(), rhs.GetLowerLeftCorner().Lat()); + const double maxY = Max(lhs.GetUpperRightCorner().Lat(), rhs.GetUpperRightCorner().Lat()); + return TGeoWindow{TGeoPoint{minX, minY}, TGeoPoint{maxX, maxY}}; + } + + TMaybe<TGeoWindow> Union(const TMaybe<TGeoWindow>& lhs, const TMaybe<TGeoWindow>& rhs) { + if (!lhs) { + return rhs; + } + if (!rhs) { + return lhs; + } + return Union(*lhs, *rhs); + } + + bool Contains(const TMaybe<TGeoWindow>& window, const TGeoPoint& point) { + if (!window) { + return false; + } + return window.GetRef().Contains(point); + } + + bool Intersects(const TGeoWindow& lhs, const TGeoWindow& rhs) { + bool haveHorizIntersection = + !(lhs.GetUpperRightCorner().Lon() <= rhs.GetLowerLeftCorner().Lon() || + rhs.GetUpperRightCorner().Lon() <= lhs.GetLowerLeftCorner().Lon()); + bool haveVertIntersection = + !(lhs.GetUpperRightCorner().Lat() <= rhs.GetLowerLeftCorner().Lat() || + rhs.GetUpperRightCorner().Lat() <= lhs.GetLowerLeftCorner().Lat()); + return haveHorizIntersection && haveVertIntersection; + } + + bool Intersects(const TMaybe<TGeoWindow>& lhs, const TMaybe<TGeoWindow>& rhs) { + if (!lhs || !rhs) { + return false; + } + return Intersects(*lhs, *rhs); + } +} // namespace NGeo diff --git a/library/cpp/geo/window.h b/library/cpp/geo/window.h new file mode 100644 index 0000000000..1205d8351b --- /dev/null +++ b/library/cpp/geo/window.h @@ -0,0 +1,264 @@ +#pragma once + +#include "point.h" +#include "size.h" +#include <util/generic/string.h> +#include <util/generic/yexception.h> +#include <util/string/cast.h> +#include <util/generic/maybe.h> + +#include <algorithm> + +namespace NGeo { + class TGeoWindow { + public: + TGeoWindow() noexcept + + = default; + + TGeoWindow(const TGeoPoint& center, const TSize& size) noexcept + : Center_(center) + , Size_(size) + { + CalcCorners(); + } + + TGeoWindow(const TGeoPoint& firstPoint, const TGeoPoint& secondPoint) noexcept + : LowerLeftCorner_{std::min(firstPoint.Lon(), secondPoint.Lon()), + std::min(firstPoint.Lat(), secondPoint.Lat())} + , UpperRightCorner_{std::max(firstPoint.Lon(), secondPoint.Lon()), + std::max(firstPoint.Lat(), secondPoint.Lat())} + { + CalcCenterAndSpan(); + } + + const TGeoPoint& GetCenter() const noexcept { + return Center_; + } + + void SetCenter(const TGeoPoint& newCenter) { + Center_ = newCenter; + CalcCorners(); + } + + const TSize& GetSize() const noexcept { + return Size_; + } + + void SetSize(const TSize& newSize) { + Size_ = newSize; + CalcCorners(); + } + + const TGeoPoint& GetLowerLeftCorner() const noexcept { + return LowerLeftCorner_; + } + + const TGeoPoint& GetUpperRightCorner() const noexcept { + return UpperRightCorner_; + } + + void swap(TGeoWindow& o) noexcept { + Center_.swap(o.Center_); + Size_.swap(o.Size_); + LowerLeftCorner_.swap(o.LowerLeftCorner_); + UpperRightCorner_.swap(o.UpperRightCorner_); + } + + bool IsValid() const noexcept { + return Center_.IsValid() && Size_.IsValid(); + } + + bool Contains(const TGeoPoint&) const; + + bool Contains(const TGeoWindow& w) const { + return Contains(w.LowerLeftCorner_) && Contains(w.UpperRightCorner_); + } + + void Stretch(double multiplier) { + Size_.Stretch(multiplier); + CalcCorners(); + } + + void Inflate(double additionX, double additionY) { + Size_.Inflate(additionX * 2, additionY * 2); + CalcCorners(); + } + + void Inflate(double addition) { + Inflate(addition, addition); + } + + bool operator!() const { + return !IsValid(); + } + + double Diameter() const; + + double Area() const { + return Size_.GetHeight() * Size_.GetWidth(); + } + + double Distance(const TGeoWindow&) const; + + double GetApproxDistance(const TPointLL& point) const; + + /** + * try to parse TGeoWindow from center and span + * return parsed TGeoWindow on success, otherwise throw exception + */ + static TGeoWindow ParseFromLlAndSpn(TStringBuf llStr, TStringBuf spnStr, TStringBuf delimiter = TStringBuf(",")); + + /** + * try to parse TGeoWindow from two corners + * return parsed TGeoWindow on success, otherwise throw exception + */ + static TGeoWindow ParseFromCornersPoints(TStringBuf leftCornerStr, TStringBuf rightCornerStr, TStringBuf delimiter = TStringBuf(",")); + + /** + * try to parse TGeoWindow from center and span + * return TMaybe of parsed TGeoWindow on success, otherwise return empty TMaybe + */ + static TMaybe<TGeoWindow> TryParseFromLlAndSpn(TStringBuf llStr, TStringBuf spnStr, TStringBuf delimiter = TStringBuf(",")); + + /** + * try to parse TGeoWindow from two corners + * return TMaybe of parsed TGeoWindow on success, otherwise return empty TMaybe + */ + static TMaybe<TGeoWindow> TryParseFromCornersPoints(TStringBuf leftCornerStr, TStringBuf rightCornerStr, TStringBuf delimiter = TStringBuf(",")); + + private: + TGeoPoint Center_; + TSize Size_; + TGeoPoint LowerLeftCorner_; + TGeoPoint UpperRightCorner_; + + void CalcCorners(); + void CalcCenterAndSpan(); + }; + + inline bool operator==(const TGeoWindow& lhs, const TGeoWindow& rhs) { + return lhs.GetCenter() == rhs.GetCenter() && lhs.GetSize() == rhs.GetSize(); + } + + inline bool operator!=(const TGeoWindow& p1, const TGeoWindow& p2) { + return !(p1 == p2); + } + + /** + * \class TMercatorWindow + * + * Represents a window in EPSG:3395 projection + * (WGS 84 / World Mercator) + */ + class TMercatorWindow { + public: + TMercatorWindow() noexcept; + TMercatorWindow(const TMercatorPoint& center, const TSize& size) noexcept; + TMercatorWindow(const TMercatorPoint& firstPoint, const TMercatorPoint& secondPoint) noexcept; + + const TMercatorPoint& GetCenter() const noexcept { + return Center_; + } + + TSize GetHalfSize() const noexcept { + return {HalfWidth_, HalfHeight_}; + } + + TSize GetSize() const noexcept { + return {GetWidth(), GetHeight()}; + } + + double GetWidth() const noexcept { + return HalfWidth_ * 2; + } + + double GetHeight() const noexcept { + return HalfHeight_ * 2; + } + + TMercatorPoint GetLowerLeftCorner() const noexcept { + return TMercatorPoint{Center_.X() - HalfWidth_, Center_.Y() - HalfHeight_}; + } + + TMercatorPoint GetUpperRightCorner() const noexcept { + return TMercatorPoint{Center_.X() + HalfWidth_, Center_.Y() + HalfHeight_}; + } + + bool Contains(const TMercatorPoint& pt) const noexcept; + + bool Contains(const TMercatorWindow& w) const { + return Contains(w.GetLowerLeftCorner()) && Contains(w.GetUpperRightCorner()); + } + + void Stretch(double multiplier) { + HalfWidth_ *= multiplier; + HalfHeight_ *= multiplier; + } + + void Inflate(double additionX, double additionY) { + HalfWidth_ += additionX; + HalfHeight_ += additionY; + } + + void Inflate(double addition) { + Inflate(addition, addition); + } + + double Area() const { + return GetHeight() * GetWidth(); + } + + private: + bool IsDefined() const { + return Center_.IsDefined() && !std::isnan(HalfWidth_) && !std::isnan(HalfHeight_); + } + + private: + TMercatorPoint Center_; + double HalfWidth_; + double HalfHeight_; + }; + + inline bool operator==(const TMercatorWindow& lhs, const TMercatorWindow& rhs) { + return lhs.GetCenter() == rhs.GetCenter() && lhs.GetHalfSize() == rhs.GetHalfSize(); + } + + inline bool operator!=(const TMercatorWindow& p1, const TMercatorWindow& p2) { + return !(p1 == p2); + } + + /** + * Typedefs + * TODO(sobols@): remove + */ + + using TWindowLL = TGeoWindow; + + /** + * Conversion + */ + + TMercatorWindow LLToMercator(const TGeoWindow&); + TGeoWindow MercatorToLL(const TMercatorWindow&); + + /** + * Utility functions + */ + + bool Contains(const TMaybe<TGeoWindow>& window, const TGeoPoint& point); + + TMaybe<TGeoWindow> Union(const TMaybe<TGeoWindow>& lhs, const TMaybe<TGeoWindow>& rhs); + TGeoWindow Union(const TGeoWindow& lhs, const TGeoWindow& rhs); + + TMaybe<TGeoWindow> Intersection(const TMaybe<TGeoWindow>& lhs, const TMaybe<TGeoWindow>& rhs); + TMaybe<TGeoWindow> Intersection(const TGeoWindow& lhs, const TGeoWindow& rhs); + + bool Intersects(const TGeoWindow& lhs, const TGeoWindow& rhs); + bool Intersects(const TMaybe<TGeoWindow>& lhs, const TMaybe<TGeoWindow>& rhs); +} // namespace NGeo + +template <> +inline void Out<NGeo::TGeoWindow>(IOutputStream& o, const NGeo::TGeoWindow& obj) { + o << '{' << obj.GetCenter() << ", " << obj.GetSize() << ", " << obj.GetLowerLeftCorner() << ", " << obj.GetUpperRightCorner() << "}"; +} diff --git a/library/cpp/geo/ya.make b/library/cpp/geo/ya.make new file mode 100644 index 0000000000..1d36003c5c --- /dev/null +++ b/library/cpp/geo/ya.make @@ -0,0 +1,19 @@ +LIBRARY() + +SRCS( + bbox.cpp + geo.cpp + point.cpp + polygon.cpp + load_save_helper.cpp + size.cpp + util.cpp + window.cpp +) + +END() + +RECURSE_FOR_TESTS( + ut + style + ) diff --git a/library/cpp/geobase/CMakeLists.darwin-x86_64.txt b/library/cpp/geobase/CMakeLists.darwin-x86_64.txt new file mode 100644 index 0000000000..b316e54e8a --- /dev/null +++ b/library/cpp/geobase/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,30 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_enum_parser_bin + TOOL_enum_parser_dependency + tools/enum_parser/enum_parser + enum_parser +) + +add_library(library-cpp-geobase) +target_link_libraries(library-cpp-geobase PUBLIC + contrib-libs-cxxsupp + yutil + geobase-library + tools-enum_parser-enum_serialization_runtime +) +target_sources(library-cpp-geobase PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/geobase/geobase.cpp +) +generate_enum_serilization(library-cpp-geobase + ${CMAKE_SOURCE_DIR}/geobase/include/structs.hpp + INCLUDE_HEADERS + geobase/include/structs.hpp +) diff --git a/library/cpp/geobase/CMakeLists.linux-aarch64.txt b/library/cpp/geobase/CMakeLists.linux-aarch64.txt new file mode 100644 index 0000000000..ab3962970d --- /dev/null +++ b/library/cpp/geobase/CMakeLists.linux-aarch64.txt @@ -0,0 +1,31 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_enum_parser_bin + TOOL_enum_parser_dependency + tools/enum_parser/enum_parser + enum_parser +) + +add_library(library-cpp-geobase) +target_link_libraries(library-cpp-geobase PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + geobase-library + tools-enum_parser-enum_serialization_runtime +) +target_sources(library-cpp-geobase PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/geobase/geobase.cpp +) +generate_enum_serilization(library-cpp-geobase + ${CMAKE_SOURCE_DIR}/geobase/include/structs.hpp + INCLUDE_HEADERS + geobase/include/structs.hpp +) diff --git a/library/cpp/geobase/CMakeLists.linux-x86_64.txt b/library/cpp/geobase/CMakeLists.linux-x86_64.txt new file mode 100644 index 0000000000..ab3962970d --- /dev/null +++ b/library/cpp/geobase/CMakeLists.linux-x86_64.txt @@ -0,0 +1,31 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_enum_parser_bin + TOOL_enum_parser_dependency + tools/enum_parser/enum_parser + enum_parser +) + +add_library(library-cpp-geobase) +target_link_libraries(library-cpp-geobase PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + geobase-library + tools-enum_parser-enum_serialization_runtime +) +target_sources(library-cpp-geobase PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/geobase/geobase.cpp +) +generate_enum_serilization(library-cpp-geobase + ${CMAKE_SOURCE_DIR}/geobase/include/structs.hpp + INCLUDE_HEADERS + geobase/include/structs.hpp +) diff --git a/library/cpp/geobase/CMakeLists.txt b/library/cpp/geobase/CMakeLists.txt new file mode 100644 index 0000000000..f8b31df0c1 --- /dev/null +++ b/library/cpp/geobase/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) + include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/library/cpp/geobase/CMakeLists.windows-x86_64.txt b/library/cpp/geobase/CMakeLists.windows-x86_64.txt new file mode 100644 index 0000000000..b316e54e8a --- /dev/null +++ b/library/cpp/geobase/CMakeLists.windows-x86_64.txt @@ -0,0 +1,30 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_enum_parser_bin + TOOL_enum_parser_dependency + tools/enum_parser/enum_parser + enum_parser +) + +add_library(library-cpp-geobase) +target_link_libraries(library-cpp-geobase PUBLIC + contrib-libs-cxxsupp + yutil + geobase-library + tools-enum_parser-enum_serialization_runtime +) +target_sources(library-cpp-geobase PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/geobase/geobase.cpp +) +generate_enum_serilization(library-cpp-geobase + ${CMAKE_SOURCE_DIR}/geobase/include/structs.hpp + INCLUDE_HEADERS + geobase/include/structs.hpp +) diff --git a/library/cpp/geobase/geobase.cpp b/library/cpp/geobase/geobase.cpp new file mode 100644 index 0000000000..24086c67a9 --- /dev/null +++ b/library/cpp/geobase/geobase.cpp @@ -0,0 +1,3 @@ +#include <library/cpp/geobase/lookup.hpp> +#include <library/cpp/geobase/timezone_getter.hpp> +#include <library/cpp/geobase/service_getter.hpp> diff --git a/library/cpp/geobase/lookup.hpp b/library/cpp/geobase/lookup.hpp new file mode 100644 index 0000000000..f663750ab2 --- /dev/null +++ b/library/cpp/geobase/lookup.hpp @@ -0,0 +1,44 @@ +#pragma once + +#include <geobase/include/lookup.hpp> +#include <geobase/include/lookup_wrapper.hpp> +#include <geobase/include/structs.hpp> + +namespace NGeobase { + using TInitTraits = NImpl::TLookup::TInitTraits; + + class TLookup: public NImpl::TLookup { + public: + using parent = NImpl::TLookup; + + explicit TLookup(const std::string& datafile, const TInitTraits traits = {}) + : parent(datafile, traits) + { + } + explicit TLookup(const TInitTraits traits) + : parent(traits) + { + } + explicit TLookup(const void* pData, size_t len) + : parent(pData, len) + { + } + + ~TLookup() { + } + }; + + using TRegion = NImpl::TRegion; + using TGeolocation = NImpl::TGeolocation; + using TLinguistics = NImpl::TLinguistics; + using TGeoPoint = NImpl::TGeoPoint; + + using TLookupWrapper = NImpl::TLookupWrapper; + + using TId = NImpl::Id; + using TIdsList = NImpl::IdsList; + using TRegionsList = NImpl::TRegionsList; + + using TIpBasicTraits = NImpl::TIpBasicTraits; + using TIpTraits = NImpl::TIpTraits; +} diff --git a/library/cpp/geobase/service_getter.hpp b/library/cpp/geobase/service_getter.hpp new file mode 100644 index 0000000000..e088081706 --- /dev/null +++ b/library/cpp/geobase/service_getter.hpp @@ -0,0 +1,7 @@ +#pragma once + +#include <geobase/include/service_getter.hpp> + +namespace NGeobase { + using TServiceGetter = NImpl::TServiceGetter; +} diff --git a/library/cpp/geobase/timezone_getter.hpp b/library/cpp/geobase/timezone_getter.hpp new file mode 100644 index 0000000000..5749f1e3d6 --- /dev/null +++ b/library/cpp/geobase/timezone_getter.hpp @@ -0,0 +1,9 @@ +#pragma once + +#include <geobase/include/timezone_getter.hpp> +#include <geobase/include/structs.hpp> + +namespace NGeobase { + using TTimezone = NImpl::TTimezone; + using TTimezoneGetter = NImpl::TTimezoneGetter; +} diff --git a/library/cpp/geobase/ya.make b/library/cpp/geobase/ya.make new file mode 100644 index 0000000000..4a73974903 --- /dev/null +++ b/library/cpp/geobase/ya.make @@ -0,0 +1,13 @@ +LIBRARY() + +SRCS( + library/cpp/geobase/geobase.cpp +) + +PEERDIR( + geobase/library +) + +GENERATE_ENUM_SERIALIZATION(geobase/include/structs.hpp) + +END() diff --git a/library/cpp/geohash/CMakeLists.darwin-x86_64.txt b/library/cpp/geohash/CMakeLists.darwin-x86_64.txt new file mode 100644 index 0000000000..dfcb278a1f --- /dev/null +++ b/library/cpp/geohash/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,32 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_enum_parser_bin + TOOL_enum_parser_dependency + tools/enum_parser/enum_parser + enum_parser +) + +add_library(library-cpp-geohash) +target_link_libraries(library-cpp-geohash PUBLIC + contrib-libs-cxxsupp + yutil + library-cpp-geo + tools-enum_parser-enum_serialization_runtime +) +target_sources(library-cpp-geohash PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/geohash/geohash.cpp +) +generate_enum_serilization(library-cpp-geohash + ${CMAKE_SOURCE_DIR}/library/cpp/geohash/direction.h + GEN_HEADER + ${CMAKE_BINARY_DIR}/library/cpp/geohash/direction.h_serialized.h + INCLUDE_HEADERS + library/cpp/geohash/direction.h +) diff --git a/library/cpp/geohash/CMakeLists.linux-aarch64.txt b/library/cpp/geohash/CMakeLists.linux-aarch64.txt new file mode 100644 index 0000000000..a907311df0 --- /dev/null +++ b/library/cpp/geohash/CMakeLists.linux-aarch64.txt @@ -0,0 +1,33 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_enum_parser_bin + TOOL_enum_parser_dependency + tools/enum_parser/enum_parser + enum_parser +) + +add_library(library-cpp-geohash) +target_link_libraries(library-cpp-geohash PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + library-cpp-geo + tools-enum_parser-enum_serialization_runtime +) +target_sources(library-cpp-geohash PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/geohash/geohash.cpp +) +generate_enum_serilization(library-cpp-geohash + ${CMAKE_SOURCE_DIR}/library/cpp/geohash/direction.h + GEN_HEADER + ${CMAKE_BINARY_DIR}/library/cpp/geohash/direction.h_serialized.h + INCLUDE_HEADERS + library/cpp/geohash/direction.h +) diff --git a/library/cpp/geohash/CMakeLists.linux-x86_64.txt b/library/cpp/geohash/CMakeLists.linux-x86_64.txt new file mode 100644 index 0000000000..a907311df0 --- /dev/null +++ b/library/cpp/geohash/CMakeLists.linux-x86_64.txt @@ -0,0 +1,33 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_enum_parser_bin + TOOL_enum_parser_dependency + tools/enum_parser/enum_parser + enum_parser +) + +add_library(library-cpp-geohash) +target_link_libraries(library-cpp-geohash PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + library-cpp-geo + tools-enum_parser-enum_serialization_runtime +) +target_sources(library-cpp-geohash PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/geohash/geohash.cpp +) +generate_enum_serilization(library-cpp-geohash + ${CMAKE_SOURCE_DIR}/library/cpp/geohash/direction.h + GEN_HEADER + ${CMAKE_BINARY_DIR}/library/cpp/geohash/direction.h_serialized.h + INCLUDE_HEADERS + library/cpp/geohash/direction.h +) diff --git a/library/cpp/geohash/CMakeLists.txt b/library/cpp/geohash/CMakeLists.txt new file mode 100644 index 0000000000..f8b31df0c1 --- /dev/null +++ b/library/cpp/geohash/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) + include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/library/cpp/geohash/CMakeLists.windows-x86_64.txt b/library/cpp/geohash/CMakeLists.windows-x86_64.txt new file mode 100644 index 0000000000..dfcb278a1f --- /dev/null +++ b/library/cpp/geohash/CMakeLists.windows-x86_64.txt @@ -0,0 +1,32 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_enum_parser_bin + TOOL_enum_parser_dependency + tools/enum_parser/enum_parser + enum_parser +) + +add_library(library-cpp-geohash) +target_link_libraries(library-cpp-geohash PUBLIC + contrib-libs-cxxsupp + yutil + library-cpp-geo + tools-enum_parser-enum_serialization_runtime +) +target_sources(library-cpp-geohash PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/geohash/geohash.cpp +) +generate_enum_serilization(library-cpp-geohash + ${CMAKE_SOURCE_DIR}/library/cpp/geohash/direction.h + GEN_HEADER + ${CMAKE_BINARY_DIR}/library/cpp/geohash/direction.h_serialized.h + INCLUDE_HEADERS + library/cpp/geohash/direction.h +) diff --git a/library/cpp/geohash/direction.h b/library/cpp/geohash/direction.h new file mode 100644 index 0000000000..88a3e6061d --- /dev/null +++ b/library/cpp/geohash/direction.h @@ -0,0 +1,14 @@ +#pragma once + +namespace NGeoHash { + enum EDirection { + NORTH = 0, + NORTH_EAST, + EAST, + SOUTH_EAST, + SOUTH, + SOUTH_WEST, + WEST, + NORTH_WEST, + }; +} diff --git a/library/cpp/geohash/geohash.cpp b/library/cpp/geohash/geohash.cpp new file mode 100644 index 0000000000..6c6d65acab --- /dev/null +++ b/library/cpp/geohash/geohash.cpp @@ -0,0 +1,413 @@ +#include "geohash.h" + +#include <util/generic/xrange.h> + +namespace { + using TNeighbourDescriptors = NGeoHash::TNeighbours<TMaybe<NGeoHash::TGeoHashDescriptor>>; + const auto directions = GetEnumAllValues<NGeoHash::EDirection>(); + + const auto doubleEps = std::numeric_limits<double>::epsilon(); + + const NGeoHash::TBoundingBoxLL& GetGlobalBBox() { + static const NGeoHash::TBoundingBoxLL globalLimits({-180, -90}, {180, 90}); + return globalLimits; + } + + const TStringBuf base32EncodeTable = "0123456789bcdefghjkmnpqrstuvwxyz"; + + const ui64 base32DecodeMask = 0x1F; + constexpr int base32DecodeTableSize = 128; + + using TBase32DecodeTable = std::array<TMaybe<i8>, base32DecodeTableSize>; + + TBase32DecodeTable MakeBase32DecodeTable() { + TBase32DecodeTable result; + result.fill(Nothing()); + for (auto i : xrange(base32EncodeTable.size())) { + result[base32EncodeTable[i]] = i; + } + return result; + } + + const TBase32DecodeTable base32DecodeTable = MakeBase32DecodeTable(); +} + +namespace NGeoHash { + static const ui8 maxSteps = 62; + static const ui8 maxPrecision = TGeoHashDescriptor::StepsToPrecision(maxSteps); // 12 + + static const TNeighbours<std::pair<i8, i8>> neighborBitMoves = { + {1, 0}, // NORTH + {1, 1}, + {0, 1}, + {-1, 1}, + {-1, 0}, + {-1, -1}, + {0, -1}, + {1, -1}, + }; + + ui8 TGeoHashDescriptor::StepsToPrecision(ui8 steps) { + return steps / StepsPerPrecisionUnit; + } + + ui8 TGeoHashDescriptor::PrecisionToSteps(ui8 precision) { + return precision * StepsPerPrecisionUnit; + } + + /* Steps interleave starting from lon so for 5 steps 3 are lon-steps and 2 are lat-steps. + * Thus there are ceil(step/2) lon-steps and floor(step/2) lat-steps */ + std::pair<ui8, ui8> TGeoHashDescriptor::LatLonSteps() const { + return std::make_pair<ui8, ui8>(Steps / 2, (Steps + 1) / 2); + } + + struct TMagicNumber { + ui64 Mask; + ui8 Shift; + }; + + /* Interleave lower bits of x and y, so the bits of x + * are in the even positions and bits from y in the odd. + * e.g. Interleave64(0b101, 0b110) => 0b111001 + * From: https://graphics.stanford.edu/~seander/bithacks.html#InterleaveBMN + */ + ui64 TGeoHashDescriptor::Interleave64(ui32 x, ui32 y) { + // attention: magic numbers + constexpr TMagicNumber mortonMagicNumbers[] = { + {0x0000FFFF0000FFFF, 16}, + {0x00FF00FF00FF00FF, 8}, + {0x0F0F0F0F0F0F0F0F, 4}, + {0x3333333333333333, 2}, + {0x5555555555555555, 1}}; + + ui64 x64 = x; + ui64 y64 = y; + + for (const auto& magicNumber : mortonMagicNumbers) { + x64 = (x64 | (x64 << magicNumber.Shift)) & magicNumber.Mask; + y64 = (y64 | (y64 << magicNumber.Shift)) & magicNumber.Mask; + } + return x64 | (y64 << 1); + } + + /* Reverse the interleave process + * Deinterleave64(0b111001) => 0b101110 + * derived from http://stackoverflow.com/questions/4909263 */ + std::pair<ui32, ui32> TGeoHashDescriptor::Deinterleave64(ui64 z) { + constexpr TMagicNumber demortonMagicNumbers[] = { + {0x5555555555555555ULL, 0}, + {0x3333333333333333ULL, 1}, + {0x0F0F0F0F0F0F0F0FULL, 2}, + {0x00FF00FF00FF00FFULL, 4}, + {0x0000FFFF0000FFFFULL, 8}, + {0x00000000FFFFFFFFULL, 16}}; + + ui64 x = z; + ui64 y = z >> 1; + + for (const auto& magicNumber : demortonMagicNumbers) { + x = (x | (x >> magicNumber.Shift)) & magicNumber.Mask; + y = (y | (y >> magicNumber.Shift)) & magicNumber.Mask; + } + + return std::make_pair(x, y); + } + + std::pair<ui32, ui32> TGeoHashDescriptor::LatLonBits() const { + auto deinterleaved = Deinterleave64(Bits); + + if (Steps % 2) { + DoSwap(deinterleaved.first, deinterleaved.second); + } + return deinterleaved; + } + + void TGeoHashDescriptor::SetLatLonBits(ui32 latBits, ui32 lonBits) { + if (Steps % 2) { + Bits = Interleave64(lonBits, latBits); + } else { + Bits = Interleave64(latBits, lonBits); + } + } + + void TGeoHashDescriptor::InitFromLatLon(double latitude, double longitude, const TBoundingBoxLL& limits, ui8 steps) { + Steps = steps; + if (Steps > maxSteps) { + ythrow yexception() << "Invalid steps: available values: 0.." << ::ToString(maxSteps); + } + + if (limits.Width() < doubleEps || limits.Height() < doubleEps) { + ythrow yexception() << "Invalid limits: min/max for one of coordinates are equal"; + } + + if (latitude < limits.GetMinY() || latitude > limits.GetMaxY() || longitude < limits.GetMinX() || longitude > limits.GetMaxX()) { + ythrow yexception() << "Invalid point (" << latitude << ", " << longitude << "): outside of limits"; + } + + double lat01 = (latitude - limits.GetMinY()) / limits.Height(); + double lon01 = (longitude - limits.GetMinX()) / limits.Width(); + + auto llSteps = LatLonSteps(); + + /* convert to fixed point based on the step size */ + lat01 *= (1 << llSteps.first); + lon01 *= (1 << llSteps.second); + + /* If lon_steps > lat_step, last bit is lon-bit, otherwise last bit is lat-bit*/ + SetLatLonBits(lat01, lon01); + } + + TGeoHashDescriptor::TGeoHashDescriptor(double latitude, double longitude, const TBoundingBoxLL& limits, ui8 steps) { + InitFromLatLon(latitude, longitude, limits, steps); + } + + TGeoHashDescriptor::TGeoHashDescriptor(double latitude, double longitude, ui8 steps) { + InitFromLatLon(latitude, longitude, GetGlobalBBox(), steps); + } + + TGeoHashDescriptor::TGeoHashDescriptor(const NGeo::TPointLL& point, const TBoundingBoxLL& limits, ui8 steps) { + InitFromLatLon(point.Lat(), point.Lon(), limits, steps); + } + + TGeoHashDescriptor::TGeoHashDescriptor(const NGeo::TPointLL& point, ui8 steps) { + InitFromLatLon(point.Lat(), point.Lon(), GetGlobalBBox(), steps); + } + + TGeoHashDescriptor::TGeoHashDescriptor(const TString& hashString) { + if (hashString.size() > maxPrecision) { + ythrow yexception() << "hashString is too long: max length is " << ::ToString(maxPrecision); + } + + Bits = 0; + for (auto c : hashString) { + Bits <<= StepsPerPrecisionUnit; + Y_ENSURE(c >= 0); + const auto decodedChar = base32DecodeTable[c]; + Y_ENSURE(decodedChar.Defined()); + Bits |= decodedChar.GetRef(); + } + + Steps = PrecisionToSteps(hashString.size()); + } + + ui64 TGeoHashDescriptor::GetBits() const { + return Bits; + } + + ui8 TGeoHashDescriptor::GetSteps() const { + return Steps; + } + + TString TGeoHashDescriptor::ToString() const { + auto precision = StepsToPrecision(Steps); + + TStringStream stream; + + auto bits = Bits; + auto activeSteps = PrecisionToSteps(precision); + + bits >>= (Steps - activeSteps); + for (auto i : xrange(precision)) { + auto ix = (bits >> (StepsPerPrecisionUnit * ((precision - i - 1)))) & base32DecodeMask; + stream << base32EncodeTable[ix]; + } + + return stream.Str(); + } + + TBoundingBoxLL TGeoHashDescriptor::ToBoundingBox(const TBoundingBoxLL& limits) const { + auto llBits = LatLonBits(); + auto llSteps = LatLonSteps(); + + double latMultiplier = limits.Height() / (1ull << llSteps.first); + double lonMultiplier = limits.Width() / (1ull << llSteps.second); + + return { + { + limits.GetMinX() + lonMultiplier * llBits.second, + limits.GetMinY() + latMultiplier * llBits.first, + }, + { + limits.GetMinX() + lonMultiplier * (llBits.second + 1), + limits.GetMinY() + latMultiplier * (llBits.first + 1), + }}; + } + + TBoundingBoxLL TGeoHashDescriptor::ToBoundingBox() const { + return ToBoundingBox(GetGlobalBBox()); + } + + NGeo::TPointLL TGeoHashDescriptor::ToPoint(const TBoundingBoxLL& limits) const { + auto boundingBox = ToBoundingBox(limits); + return { + boundingBox.GetMinX() + boundingBox.Width() / 2, + boundingBox.GetMinY() + boundingBox.Height() / 2}; + } + + NGeo::TPointLL TGeoHashDescriptor::ToPoint() const { + return ToPoint(GetGlobalBBox()); + } + + TMaybe<TGeoHashDescriptor> TGeoHashDescriptor::GetNeighbour(EDirection direction) const { + TGeoHashDescriptor result(0, Steps); + auto llBits = LatLonBits(); + auto llSteps = LatLonSteps(); + std::pair<i8, i8> bitMove = neighborBitMoves[direction]; + + auto newLatBits = llBits.first + bitMove.first; + auto newLonBits = llBits.second + bitMove.second; + + // Overflow in lat means polar, so return Nothing + if (newLatBits >> llSteps.first != 0) { + return Nothing(); + } + + // Overflow in lon means 180-meridian, so just remove overflowed bits + newLonBits &= ((1 << llSteps.second) - 1); + result.SetLatLonBits(newLatBits, newLonBits); + return result; + } + + TNeighbourDescriptors TGeoHashDescriptor::GetNeighbours() const { + TNeighbourDescriptors result; + auto llBits = LatLonBits(); + auto llSteps = LatLonSteps(); + std::pair<i8, i8> bitMove; + + for (auto direction : directions) { + bitMove = neighborBitMoves[direction]; + + auto newLatBits = llBits.first + bitMove.first; + auto newLonBits = llBits.second + bitMove.second; + + // Overflow in lat means polar, so put Nothing + if (newLatBits >> llSteps.first != 0) { + result[direction] = Nothing(); + } else { + result[direction] = TGeoHashDescriptor(0, Steps); + // Overflow in lon means 180-meridian, so just remove overflowed bits + newLonBits &= ((1 << llSteps.second) - 1); + result[direction]->SetLatLonBits(newLatBits, newLonBits); + } + } + + return result; + } + + TVector<TGeoHashDescriptor> TGeoHashDescriptor::GetChildren(ui8 steps = StepsPerPrecisionUnit) const { + TVector<TGeoHashDescriptor> children(Reserve(1 << steps)); + ui8 childrenSteps = steps + Steps; + auto parentBits = Bits << steps; + if (childrenSteps > maxSteps) { + ythrow yexception() << "Resulting geohash steps are too big, available values: 0.." << ::ToString(maxSteps); + } + for (auto residue : xrange(1 << steps)) { + children.emplace_back(parentBits | residue, childrenSteps); + } + return children; + } + + /* Functions */ + + ui64 Encode(double latitude, double longitude, ui8 precision) { + auto descr = TGeoHashDescriptor( + latitude, longitude, TGeoHashDescriptor::PrecisionToSteps(precision)); + return descr.GetBits(); + } + ui64 Encode(const NGeo::TPointLL& point, ui8 precision) { + return TGeoHashDescriptor( + point, TGeoHashDescriptor::PrecisionToSteps(precision)) + .GetBits(); + } + + TString EncodeToString(double latitude, double longitude, ui8 precision) { + return TGeoHashDescriptor( + latitude, longitude, TGeoHashDescriptor::PrecisionToSteps(precision)) + .ToString(); + } + TString EncodeToString(const NGeo::TPointLL& point, ui8 precision) { + return TGeoHashDescriptor( + point, TGeoHashDescriptor::PrecisionToSteps(precision)) + .ToString(); + } + + NGeo::TPointLL DecodeToPoint(const TString& hashString) { + return TGeoHashDescriptor(hashString).ToPoint(); + } + NGeo::TPointLL DecodeToPoint(ui64 hash, ui8 precision) { + return TGeoHashDescriptor(hash, TGeoHashDescriptor::PrecisionToSteps(precision)).ToPoint(); + } + + TBoundingBoxLL DecodeToBoundingBox(const TString& hashString) { + return TGeoHashDescriptor(hashString).ToBoundingBox(); + } + + TBoundingBoxLL DecodeToBoundingBox(ui64 hash, ui8 precision) { + return TGeoHashDescriptor(hash, TGeoHashDescriptor::PrecisionToSteps(precision)).ToBoundingBox(); + } + + TMaybe<ui64> GetNeighbour(ui64 hash, EDirection direction, ui8 precision) { + auto neighbour = TGeoHashDescriptor( + hash, TGeoHashDescriptor::PrecisionToSteps(precision)) + .GetNeighbour(direction); + + if (neighbour.Defined()) { + return neighbour->GetBits(); + } else { + return Nothing(); + } + } + + TMaybe<TString> GetNeighbour(const TString& hashString, EDirection direction) { + auto neighbour = TGeoHashDescriptor(hashString).GetNeighbour(direction); + if (neighbour.Defined()) { + return neighbour->ToString(); + } else { + return Nothing(); + } + } + + TGeoHashBitsNeighbours GetNeighbours(ui64 hash, ui8 precision) { + TGeoHashBitsNeighbours result; + + auto neighbours = TGeoHashDescriptor( + hash, TGeoHashDescriptor::PrecisionToSteps(precision)) + .GetNeighbours(); + + for (auto direction : directions) { + if (neighbours[direction].Defined()) { + result[direction] = neighbours[direction]->GetBits(); + } else { + result[direction] = Nothing(); + } + } + + return result; + } + + TGeoHashStringNeighbours GetNeighbours(const TString& hashString) { + TGeoHashStringNeighbours result; + + auto neighbours = TGeoHashDescriptor( + hashString) + .GetNeighbours(); + + for (auto direction : directions) { + if (neighbours[direction].Defined()) { + result[direction] = neighbours[direction]->ToString(); + } else { + result[direction] = Nothing(); + } + } + return result; + } + + TVector<TString> GetChildren(const TString& hashString) { + TVector<TString> result(Reserve(base32EncodeTable.size())); + + for (auto ch : base32EncodeTable) { + result.push_back(hashString + ch); + } + return result; + } +} diff --git a/library/cpp/geohash/geohash.h b/library/cpp/geohash/geohash.h new file mode 100644 index 0000000000..7d270612e8 --- /dev/null +++ b/library/cpp/geohash/geohash.h @@ -0,0 +1,123 @@ +#pragma once + +/** + * @file + * @brief Strong (because it works) and independent (of contrib/libs/geohash) GeoHash implementation + * GeoHash algo: https://en.wikipedia.org/wiki/Geohash + * Useful links: + * 1. http://geohash.org - Main Site + * 2. https://dou.ua/lenta/articles/geohash - Geohash-based geopoints clusterization + * 3. http://www.movable-type.co.uk/scripts/geohash.html - bidirectional encoding and visualization + */ +#include <library/cpp/geohash/direction.h> +#include <library/cpp/geohash/direction.h_serialized.h> + +#include <library/cpp/geo/geo.h> + +#include <util/generic/maybe.h> +#include <util/generic/string.h> +#include <util/system/types.h> + +#include <array> + +namespace NGeoHash { + using TBoundingBoxLL = NGeo::TGeoBoundingBox; + static constexpr auto directionsCount = GetEnumItemsCount<EDirection>(); + + template <class T> + class TNeighbours: public std::array<T, directionsCount> { + public: + TNeighbours() = default; + + TNeighbours(std::initializer_list<T> list) { + Y_ASSERT(list.size() == directionsCount); + std::copy(list.begin(), list.end(), std::array<T, directionsCount>::begin()); + } + + const T& operator[](EDirection direction) const { + return std::array<T, directionsCount>::operator[](static_cast<size_t>(direction)); + } + + T& operator[](EDirection direction) { + return std::array<T, directionsCount>::operator[](static_cast<size_t>(direction)); + } + }; + + class TGeoHashDescriptor { + public: + TGeoHashDescriptor() noexcept + : Bits(0) + , Steps(0) + { + } + + TGeoHashDescriptor(ui64 bits, ui8 steps) noexcept + : Bits(bits) + , Steps(steps) + { + } + + TGeoHashDescriptor(double latitude, double longitude, ui8 steps); + TGeoHashDescriptor(double latitude, double longitude, const TBoundingBoxLL& limits, ui8 steps); + TGeoHashDescriptor(const NGeo::TPointLL& point, ui8 steps); + TGeoHashDescriptor(const NGeo::TPointLL& point, const TBoundingBoxLL& limits, ui8 steps); + + explicit TGeoHashDescriptor(const TString& hashString); + + ui64 GetBits() const; + ui8 GetSteps() const; + + TString ToString() const; + + NGeo::TPointLL ToPoint(const TBoundingBoxLL& limits) const; + NGeo::TPointLL ToPoint() const; + + TBoundingBoxLL ToBoundingBox(const TBoundingBoxLL& limits) const; + TBoundingBoxLL ToBoundingBox() const; + + TMaybe<TGeoHashDescriptor> GetNeighbour(EDirection direction) const; + TNeighbours<TMaybe<TGeoHashDescriptor>> GetNeighbours() const; + + TVector<TGeoHashDescriptor> GetChildren(ui8 steps) const; + + static ui8 StepsToPrecision(ui8 steps); + static ui8 PrecisionToSteps(ui8 precision); + + private: + void InitFromLatLon(double latitude, double longitude, const TBoundingBoxLL& limits, ui8 steps); + std::pair<ui8, ui8> LatLonSteps() const; + std::pair<ui32, ui32> LatLonBits() const; + void SetLatLonBits(ui32 latBits, ui32 lonBits); + static ui64 Interleave64(ui32 x, ui32 y); + static std::pair<ui32, ui32> Deinterleave64(ui64 interleaved); + + private: + static const ui8 StepsPerPrecisionUnit = 5; + ui64 Bits; + ui8 Steps; + }; + + ui64 Encode(double latitude, double longitude, ui8 precision); + ui64 Encode(const NGeo::TPointLL& point, ui8 precision); + + TString EncodeToString(double latitude, double longitude, ui8 precision); + TString EncodeToString(const NGeo::TPointLL& point, ui8 precision); + + NGeo::TPointLL DecodeToPoint(const TString& hashString); + NGeo::TPointLL DecodeToPoint(ui64 hash, ui8 precision); + + TBoundingBoxLL DecodeToBoundingBox(const TString& hashString); + TBoundingBoxLL DecodeToBoundingBox(ui64 hash, ui8 precision); + + TMaybe<ui64> GetNeighbour(ui64 hash, EDirection direction, ui8 precision); + TMaybe<TString> GetNeighbour(const TString& hashString, EDirection direction); + + using TGeoHashBitsNeighbours = TNeighbours<TMaybe<ui64>>; + using TGeoHashStringNeighbours = TNeighbours<TMaybe<TString>>; + + TGeoHashBitsNeighbours GetNeighbours(ui64 hash, ui8 precision); + TGeoHashStringNeighbours GetNeighbours(const TString& hashString); + + TVector<TString> GetChildren(const TString& hashString); + +} /* namespace NGeoHash */ diff --git a/library/cpp/geohash/ya.make b/library/cpp/geohash/ya.make new file mode 100644 index 0000000000..3350ca1cc6 --- /dev/null +++ b/library/cpp/geohash/ya.make @@ -0,0 +1,13 @@ +LIBRARY() + +PEERDIR( + library/cpp/geo +) + +SRCS( + geohash.cpp +) + +GENERATE_ENUM_SERIALIZATION_WITH_HEADER(direction.h) + +END() diff --git a/library/cpp/ipreg/CMakeLists.darwin-x86_64.txt b/library/cpp/ipreg/CMakeLists.darwin-x86_64.txt new file mode 100644 index 0000000000..05b000b7da --- /dev/null +++ b/library/cpp/ipreg/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,53 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_enum_parser_bin + TOOL_enum_parser_dependency + tools/enum_parser/enum_parser + enum_parser +) +get_built_tool_path( + TOOL_enum_parser_bin + TOOL_enum_parser_dependency + tools/enum_parser/enum_parser + enum_parser +) + +add_library(library-cpp-ipreg) +target_link_libraries(library-cpp-ipreg PUBLIC + contrib-libs-cxxsupp + yutil + cpp-getopt-small + library-cpp-json + library-cpp-geobase + library-cpp-int128 + tools-enum_parser-enum_serialization_runtime +) +target_sources(library-cpp-ipreg PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/address.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/checker.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/merge.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/range.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/reader.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/sources.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/split.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/stopwatch.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/writer.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/util_helpers.cpp +) +generate_enum_serilization(library-cpp-ipreg + ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/address.h + INCLUDE_HEADERS + library/cpp/ipreg/address.h +) +generate_enum_serilization(library-cpp-ipreg + ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/sources.h + INCLUDE_HEADERS + library/cpp/ipreg/sources.h +) diff --git a/library/cpp/ipreg/CMakeLists.linux-aarch64.txt b/library/cpp/ipreg/CMakeLists.linux-aarch64.txt new file mode 100644 index 0000000000..5e76739840 --- /dev/null +++ b/library/cpp/ipreg/CMakeLists.linux-aarch64.txt @@ -0,0 +1,54 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_enum_parser_bin + TOOL_enum_parser_dependency + tools/enum_parser/enum_parser + enum_parser +) +get_built_tool_path( + TOOL_enum_parser_bin + TOOL_enum_parser_dependency + tools/enum_parser/enum_parser + enum_parser +) + +add_library(library-cpp-ipreg) +target_link_libraries(library-cpp-ipreg PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + cpp-getopt-small + library-cpp-json + library-cpp-geobase + library-cpp-int128 + tools-enum_parser-enum_serialization_runtime +) +target_sources(library-cpp-ipreg PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/address.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/checker.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/merge.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/range.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/reader.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/sources.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/split.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/stopwatch.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/writer.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/util_helpers.cpp +) +generate_enum_serilization(library-cpp-ipreg + ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/address.h + INCLUDE_HEADERS + library/cpp/ipreg/address.h +) +generate_enum_serilization(library-cpp-ipreg + ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/sources.h + INCLUDE_HEADERS + library/cpp/ipreg/sources.h +) diff --git a/library/cpp/ipreg/CMakeLists.linux-x86_64.txt b/library/cpp/ipreg/CMakeLists.linux-x86_64.txt new file mode 100644 index 0000000000..5e76739840 --- /dev/null +++ b/library/cpp/ipreg/CMakeLists.linux-x86_64.txt @@ -0,0 +1,54 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_enum_parser_bin + TOOL_enum_parser_dependency + tools/enum_parser/enum_parser + enum_parser +) +get_built_tool_path( + TOOL_enum_parser_bin + TOOL_enum_parser_dependency + tools/enum_parser/enum_parser + enum_parser +) + +add_library(library-cpp-ipreg) +target_link_libraries(library-cpp-ipreg PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + cpp-getopt-small + library-cpp-json + library-cpp-geobase + library-cpp-int128 + tools-enum_parser-enum_serialization_runtime +) +target_sources(library-cpp-ipreg PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/address.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/checker.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/merge.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/range.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/reader.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/sources.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/split.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/stopwatch.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/writer.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/util_helpers.cpp +) +generate_enum_serilization(library-cpp-ipreg + ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/address.h + INCLUDE_HEADERS + library/cpp/ipreg/address.h +) +generate_enum_serilization(library-cpp-ipreg + ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/sources.h + INCLUDE_HEADERS + library/cpp/ipreg/sources.h +) diff --git a/library/cpp/ipreg/CMakeLists.txt b/library/cpp/ipreg/CMakeLists.txt new file mode 100644 index 0000000000..f8b31df0c1 --- /dev/null +++ b/library/cpp/ipreg/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) + include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/library/cpp/ipreg/CMakeLists.windows-x86_64.txt b/library/cpp/ipreg/CMakeLists.windows-x86_64.txt new file mode 100644 index 0000000000..05b000b7da --- /dev/null +++ b/library/cpp/ipreg/CMakeLists.windows-x86_64.txt @@ -0,0 +1,53 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_enum_parser_bin + TOOL_enum_parser_dependency + tools/enum_parser/enum_parser + enum_parser +) +get_built_tool_path( + TOOL_enum_parser_bin + TOOL_enum_parser_dependency + tools/enum_parser/enum_parser + enum_parser +) + +add_library(library-cpp-ipreg) +target_link_libraries(library-cpp-ipreg PUBLIC + contrib-libs-cxxsupp + yutil + cpp-getopt-small + library-cpp-json + library-cpp-geobase + library-cpp-int128 + tools-enum_parser-enum_serialization_runtime +) +target_sources(library-cpp-ipreg PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/address.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/checker.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/merge.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/range.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/reader.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/sources.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/split.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/stopwatch.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/writer.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/util_helpers.cpp +) +generate_enum_serilization(library-cpp-ipreg + ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/address.h + INCLUDE_HEADERS + library/cpp/ipreg/address.h +) +generate_enum_serilization(library-cpp-ipreg + ${CMAKE_SOURCE_DIR}/library/cpp/ipreg/sources.h + INCLUDE_HEADERS + library/cpp/ipreg/sources.h +) diff --git a/library/cpp/ipreg/address.cpp b/library/cpp/ipreg/address.cpp new file mode 100644 index 0000000000..83880ccbae --- /dev/null +++ b/library/cpp/ipreg/address.cpp @@ -0,0 +1,365 @@ +#include "address.h" + +#include <util/generic/mem_copy.h> +#include <util/stream/format.h> +#include <util/string/cast.h> +#include <util/string/hex.h> +#include <util/string/printf.h> +#include <util/string/split.h> +#include <util/string/type.h> +#include <util/string/vector.h> +#include <util/system/byteorder.h> +#include <util/network/socket.h> + +#include <sstream> + +namespace NIPREG { + +TAddress TAddress::ParseAny(TStringBuf str) { + if (str.find(':') != TStringBuf::npos) { + return ParseIPv6(str); + } else if (str.find('.') != TStringBuf::npos) { + return ParseIPv4(str); + } else if (IsNumber(str)) { + return ParseIPv4Num(str); // TODO(dieash@) IPv6Num + } + + ythrow yexception() << "Unrecognized IPREG address format: " << str; +} + +TAddress TAddress::ParseIPv6(TStringBuf str) { + TAddress addr; + if (inet_pton(AF_INET6, TString(str).c_str(), &addr.Data) != 1) + ythrow yexception() << "Failed to parse IPREG address " << str << " as IPv6"; + + return addr; +} + +TAddress TAddress::ParseIPv4(TStringBuf str) { + struct in_addr ipv4; + if (inet_aton(TString(str).c_str(), &ipv4) != 1) + ythrow yexception() << "Failed to parse IPREG address " << str << " as IPv4"; + + return FromIPv4Num(InetToHost(ipv4.s_addr)); +} + +TAddress TAddress::ParseIPv4Num(TStringBuf str) { + return FromIPv4Num(FromString<ui32>(str)); +} + +TAddress TAddress::ParseIPv6Num(TStringBuf str) { + return FromUint128(FromString<ui128>(str)); +} + +TAddress TAddress::FromBinary(unsigned char const * const data) { + TAddress addr; + MemCopy<unsigned char>(addr.Data, data, sizeof(addr.Data)); + return addr; +} + +TAddress TAddress::FromBinaryIPv4(unsigned char const * const data) { + return TAddress::FromIPv4Num( + (static_cast<ui32>(data[0]) << 24) | + (static_cast<ui32>(data[1]) << 16) | + (static_cast<ui32>(data[2]) << 8) | + (static_cast<ui32>(data[3])) + ); +} + +TAddress TAddress::FromIPv4Num(ui32 num) { + TAddress addr; + memset((void*)&addr.Data, 0x00, 10); + addr.Data[10] = 0xff; + addr.Data[11] = 0xff; + addr.Data[12] = (num >> 24) & 0xff; + addr.Data[13] = (num >> 16) & 0xff; + addr.Data[14] = (num >> 8) & 0xff; + addr.Data[15] = (num) & 0xff; + return addr; +} + +TAddress TAddress::FromUint128(ui128 intAddr) { + const auto hiBE = HostToInet(GetHigh(intAddr)); + const auto loBE = HostToInet(GetLow(intAddr)); + + TAddress addr; + ui64* dataPtr = reinterpret_cast<ui64*>(addr.Data); + MemCopy<ui64>(dataPtr, &hiBE, 1); + MemCopy<ui64>(dataPtr + 1, &loBE, 1); + + return addr; +} + +namespace { + void SetHostsBits(TAddress& addr, char value) { + addr.Data[ 8] = value; + addr.Data[ 9] = value; + addr.Data[10] = value; + addr.Data[11] = value; + addr.Data[12] = value; + addr.Data[13] = value; + addr.Data[14] = value; + addr.Data[15] = value; + } +} // anon-ns + +TAddress TAddress::MakeNet64Broadcast(TAddress base) { + SetHostsBits(base, 0xff); + return base; +} + +TAddress TAddress::MakeNet64Prefix(TAddress base) { + SetHostsBits(base, 0x00); + return base; +} + +const TAddress& TAddress::Lowest() { + static const TAddress first{{}}; + return first; +} + +const TAddress& TAddress::Highest() { + static const TAddress last{{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}}; + return last; +} + +TString TAddress::AsIPv4() const { + return ToString(Data[12]) + "." + ToString(Data[13]) + "." + ToString(Data[14]) + "." + ToString(Data[15]); +} + +TString TAddress::AsIPv4Num() const { + ui32 addr = (ui32)Data[12] << 24 | (ui32)Data[13] << 16 | (ui32)Data[14] << 8 | Data[15]; + return ToString(addr); +} + +TString TAddress::AsIPv6() const { + TStringStream ss; + + for (size_t octet = 0; octet < sizeof(Data); octet++) { + ss << Hex(Data[octet], HF_FULL); + if (octet < 15 && octet & 1) + ss << ':'; + } + + TString s = ss.Str(); + s.to_lower(); + + return s; +} + +TString TAddress::AsIPv6Num() const { + return ToString(AsUint128()); +} + +TString TAddress::GetTextFromNetOrder() const { + char buf[INET6_ADDRSTRLEN]; + if (inet_ntop(AF_INET6, (void*)(&Data), buf, sizeof(buf)) == NULL) + ythrow yexception() << "Failed to stringify IPREG address"; + + return buf; +} + +namespace { + TString GetHexStr(ui64 v) { + return HexEncode(reinterpret_cast<const char*>(&v), sizeof(v)); + } + + void HexDumpToStream(std::stringstream& ss, ui64 beData) { + const auto dataHexStr = GetHexStr(beData); + const auto hostData = InetToHost(beData); + const auto hostDataStr = GetHexStr(hostData); + ss << "\t/big-end[" << beData << " / " << dataHexStr << "]\t/host[" << hostData << " / " << hostDataStr << "]\n"; + } +} // anon-ns + +TString TAddress::GetHexString(const bool deepView) const { + std::stringstream ss; + ss << HexEncode(TStringBuf(reinterpret_cast<const char*>(Data), 16)); + if (deepView) { + const ui64* dataPtr = reinterpret_cast<const ui64*>(Data); + + const auto hi = *dataPtr; + ss << "\nhigh-data"; HexDumpToStream(ss, hi); + + const auto lo = *(dataPtr + 1); + ss << "\nlow-data"; HexDumpToStream(ss, lo); + } + return ss.str().c_str(); +} + +TString TAddress::AsShortIP() const { + if (IsIPv4()) + return AsIPv4(); + else + return GetTextFromNetOrder(); +} + +TString TAddress::AsShortIPv6() const { + if (IsIPv4()) + return Sprintf("::ffff:%x:%x", (ui32)Data[12] << 8 | (ui32)Data[13], (ui32)Data[14] << 8 | (ui32)Data[15]); + else + return GetTextFromNetOrder(); +} + +TString TAddress::AsLongIP() const { + if (IsIPv4()) + return AsIPv4(); + else + return AsIPv6(); +} + +ui128 TAddress::AsUint128() const { + const ui64* dataPtr = reinterpret_cast<const ui64*>(Data); + return ui128(InetToHost(*dataPtr), InetToHost(*(dataPtr + 1))); +} + +ui64 TAddress::GetHigh64() const { + const ui64* dataPtr = reinterpret_cast<const ui64*>(Data); + return *dataPtr; +} + +ui64 TAddress::GetLow64() const { + const ui64* dataPtr = reinterpret_cast<const ui64*>(Data); + return *(dataPtr + 1); +} + +ui64 TAddress::GetHigh64LE() const { + return InetToHost(GetHigh64()); +} + +ui64 TAddress::GetLow64LE() const { + return InetToHost(GetLow64()); +} + +bool TAddress::IsNet64Broadcast() const { + static const auto NET64_HOSTS_MASK = TAddress::ParseAny("::ffff:ffff:ffff:ffff").GetLow64(); + const auto ownHostsBits = GetLow64(); + return ownHostsBits == NET64_HOSTS_MASK; +} + +bool TAddress::IsNet64Host() const { + const auto isSomeOwnHostsBitsOn = GetLow64() > 0; + return isSomeOwnHostsBitsOn && !IsNet64Broadcast(); +} + +TString TAddress::Format(EAddressFormat format) const { + switch (format) { + case EAddressFormat::IPV6: + return AsIPv6(); + case EAddressFormat::LONG_IP: + return AsLongIP(); + case EAddressFormat::SHORT_IP: + return AsShortIP(); + case EAddressFormat::NUMERIC_IPV4: + return AsIPv4Num(); + case EAddressFormat::NUMERIC_IPV6: + return AsIPv6Num(); + case EAddressFormat::NTOA: + return GetTextFromNetOrder(); + case EAddressFormat::SHORT_IPV6: + return AsShortIPv6(); + } +} + +bool TAddress::IsIPv4() const { + static const unsigned char mask[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xff, 0xff }; + return memcmp(Data, mask, sizeof(mask)) == 0; +} + +TAddress TAddress::Next() const { + if (Highest() == *this) { + return Highest(); + } + + TAddress addr; + bool carry = 1; + for (ssize_t octet = 15; octet >= 0; octet--) { + addr.Data[octet] = Data[octet] + carry; + carry = carry && !addr.Data[octet]; + } + + return addr; +} + +TAddress TAddress::Prev() const { + if (Lowest() == *this) { + return Lowest(); + } + + TAddress addr{}; + bool carry = 1; + for (ssize_t octet = 15; octet >= 0; octet--) { + addr.Data[octet] = Data[octet] - carry; + carry = carry && !Data[octet]; + } + + return addr; +} + +double TAddress::operator-(const TAddress& rhs) const { + double diff = 0.0; + for (ssize_t octet = 0; octet < 16; octet++) { + diff = diff * 256.0 + (static_cast<int>(Data[octet]) - static_cast<int>(rhs.Data[octet])); + } + return diff; +} + +ui128 TAddress::Distance(const TAddress& a, const TAddress& b) { + const auto& intA = a.AsUint128(); + const auto& intB = b.AsUint128(); + return (a > b) ? (intA - intB) : (intB - intA); +} + +namespace { + constexpr size_t MAX_IPV6_MASK_LEN = 16 * 8; + constexpr size_t MAX_IPV4_MASK_LEN = 4 * 8; + constexpr size_t IPV4_IN6_MASK_BASE = MAX_IPV6_MASK_LEN - MAX_IPV4_MASK_LEN; + + TAddress SetMaskBits(const TAddress& addr, const size_t wantedMaskLen) { + auto maskLen = wantedMaskLen; + if (addr.IsIPv4() && maskLen && maskLen <= MAX_IPV4_MASK_LEN) { + maskLen += IPV4_IN6_MASK_BASE; + } + + if (maskLen == 0 || maskLen > MAX_IPV6_MASK_LEN || (addr.IsIPv4() && maskLen < IPV4_IN6_MASK_BASE)) { + ythrow yexception() << "strange mask (calc/wanted) " << maskLen << "/" << wantedMaskLen << "; " << addr; + } + + const int octetsForUpdate = (MAX_IPV6_MASK_LEN - maskLen) / 8; + const int bitsForUpdate = (MAX_IPV6_MASK_LEN - maskLen) % 8; + + size_t currOctet = 15; + TAddress addrWithMask = addr; + + for (int octetNum = 0; octetNum != octetsForUpdate; ++octetNum) { + addrWithMask.Data[currOctet--] = 0xff; + } + + for (int bitNum = 0; bitNum != bitsForUpdate; ++bitNum) { + addrWithMask.Data[currOctet] ^= 1 << bitNum; + } + + return addrWithMask; + } +} // anon-ns + +TNetwork::TNetwork(const TString& str) + : TNetwork(static_cast<TVector<TString>>(StringSplitter(str).Split('/').SkipEmpty())) +{} + +TNetwork::TNetwork(const TVector<TString>& data) + : TNetwork(data.size() ? data[0] : "", + data.size() > 1 ? FromStringWithDefault<size_t>(data[1]) : 0) +{} + +TNetwork::TNetwork(const TString& net, size_t maskLen) + : begin(TAddress::ParseAny(net)) + , end(SetMaskBits(begin, maskLen)) +{} + +} + +IOutputStream& operator<<(IOutputStream& output, const NIPREG::TAddress& addr) { + output << addr.AsShortIPv6(); + return output; +} diff --git a/library/cpp/ipreg/address.h b/library/cpp/ipreg/address.h new file mode 100644 index 0000000000..9071418d5b --- /dev/null +++ b/library/cpp/ipreg/address.h @@ -0,0 +1,137 @@ +#pragma once + +#include <library/cpp/int128/int128.h> + +#include <util/generic/string.h> +#include <util/digest/murmur.h> +#include <util/string/cast.h> + +namespace NIPREG { + +struct TAddress { + enum class EAddressFormat { + IPV6 = 0x00 /* "ipv6" */, + LONG_IP = 0x01 /* "long" */, + SHORT_IP = 0x02 /* "short" */, + NUMERIC_IPV4 = 0x03 /* "num4" */, + NTOA = 0x04 /* "n2a" */, + SHORT_IPV6 = 0x05 /* "short-ipv6" */, + NUMERIC_IPV6 = 0x06 /* "num" */, + }; + + unsigned char Data[16] = {0}; // NOTA BENE: network byte order (Big-Endian) + + // Comparison + bool operator==(const TAddress& other) const { + return memcmp(Data, other.Data, sizeof(Data)) == 0; + } + + bool operator<(const TAddress& other) const { + return memcmp(Data, other.Data, sizeof(Data)) < 0; + } + + bool operator>(const TAddress& other) const { + return memcmp(Data, other.Data, sizeof(Data)) > 0; + } + + bool operator!=(const TAddress& other) const { + return !(*this == other); + } + + bool operator<=(const TAddress& other) const { + return !(*this > other); + } + + bool operator>=(const TAddress& other) const { + return !(*this < other); + } + + double operator-(const TAddress& rhs) const; + + // Parsing + static TAddress ParseAny(TStringBuf str); + + static TAddress ParseIPv6(TStringBuf str); + static TAddress ParseIPv4(TStringBuf str); + static TAddress ParseIPv4Num(TStringBuf str); + static TAddress ParseIPv6Num(TStringBuf str); + + static TAddress FromIPv4Num(ui32 num); + static TAddress FromUint128(ui128 addr); + static TAddress FromBinary(unsigned char const * data); + static TAddress FromBinaryIPv4(unsigned char const * const data); + + static TAddress MakeNet64Broadcast(TAddress base); + static TAddress MakeNet64Prefix(TAddress base); + + static const TAddress& Lowest(); + static const TAddress& Highest(); + + // Inspecting + TString AsIPv4() const; + TString AsIPv4Num() const; + TString AsIPv6() const; + TString AsIPv6Num() const; + TString GetTextFromNetOrder() const; + TString GetHexString(bool deepView = false) const; + + TString AsShortIP() const; + TString AsShortIPv6() const; + TString AsLongIP() const; + + ui128 AsUint128() const; + ui64 GetHigh64() const; + ui64 GetLow64() const; + ui64 GetHigh64LE() const; + ui64 GetLow64LE() const; + + bool IsNet64Broadcast() const; + bool IsNet64Host() const; + + TAddress GetNet64() const { + return TAddress::FromUint128(ui128{GetHigh64LE()} << 64); + } + + TAddress GetPrevNet64() const { + return TAddress::FromUint128(ui128{GetHigh64LE() - 1} << 64); + } + + TAddress GetNextNet64() const { + return TAddress::FromUint128(ui128{GetHigh64LE() + 1} << 64); + } + + TString Format(EAddressFormat format) const; + + int GetType() const { return IsIPv4() ? 4 : 6; } + bool IsIPv4() const; + + // Mutating + TAddress Next() const; + TAddress Prev() const; + + static ui128 Distance(const TAddress& a, const TAddress& b); +}; + +using EAddressFormat = TAddress::EAddressFormat; + +struct TNetwork { + TAddress begin; + TAddress end; + + TNetwork(const TString& str = "0.0.0.0/32"); + +private: + TNetwork(const TVector<TString>& data); + TNetwork(const TString& net, size_t mask); +}; + +} // NIPREG + +template <> +struct THash<NIPREG::TAddress> { + inline size_t operator()(const NIPREG::TAddress& address) const { + return MurmurHash<size_t>((const void*)address.Data, 16); + } +}; + +IOutputStream& operator<<(IOutputStream& output, const NIPREG::TAddress& addr); diff --git a/library/cpp/ipreg/checker.cpp b/library/cpp/ipreg/checker.cpp new file mode 100644 index 0000000000..9c41d27dc0 --- /dev/null +++ b/library/cpp/ipreg/checker.cpp @@ -0,0 +1,47 @@ +#include "checker.h" + +namespace NIPREG { + +void TChecker::CheckNextFatal(const TAddress& first, const TAddress& last) { + if (!CheckNext(first, last)) + ythrow yexception() << "IPREG format error: " << first.AsIPv6() << " - " << last.AsIPv6(); +} + +TFlatChecker::TFlatChecker() : HasState(false) { +} + +bool TFlatChecker::CheckNext(const TAddress& first, const TAddress& last) { + bool result = true; + + if (first > last) + result = false; + + if (HasState && first <= PrevLast) + result = false; + + PrevLast = last; + HasState = true; + + return result; +} + +TIntersectingChecker::TIntersectingChecker() : HasState(false) { +} + +bool TIntersectingChecker::CheckNext(const TAddress& first, const TAddress& last) { + bool result = true; + + if (first > last) + result = false; + + if (HasState && (first < PrevFirst || (first == PrevFirst && last < PrevLast))) + result = false; + + PrevFirst = first; + PrevLast = last; + HasState = true; + + return result; +} + +} diff --git a/library/cpp/ipreg/checker.h b/library/cpp/ipreg/checker.h new file mode 100644 index 0000000000..1a04e62e77 --- /dev/null +++ b/library/cpp/ipreg/checker.h @@ -0,0 +1,37 @@ +#pragma once + +#include "address.h" + +namespace NIPREG { + +class TChecker { +public: + virtual ~TChecker() {} + + virtual bool CheckNext(const TAddress& first, const TAddress& last) = 0; + + void CheckNextFatal(const TAddress& first, const TAddress& last); +}; + +class TFlatChecker: public TChecker { +private: + TAddress PrevLast; + bool HasState; + +public: + TFlatChecker(); + virtual bool CheckNext(const TAddress& first, const TAddress& last); +}; + +class TIntersectingChecker: public TChecker { +private: + TAddress PrevFirst; + TAddress PrevLast; + bool HasState; + +public: + TIntersectingChecker(); + virtual bool CheckNext(const TAddress& first, const TAddress& last); +}; + +} diff --git a/library/cpp/ipreg/merge.cpp b/library/cpp/ipreg/merge.cpp new file mode 100644 index 0000000000..d31e9dce5d --- /dev/null +++ b/library/cpp/ipreg/merge.cpp @@ -0,0 +1,69 @@ +#include "merge.h" + +namespace NIPREG { + +void MergeIPREGS(TReader &a, TReader& b, std::function<void(const TAddress& first, const TAddress& last, const TString *a, const TString *b)>&& proc) { + bool hasA = a.Next(); + bool hasB = b.Next(); + + TAddress top = TAddress::Lowest(); + TAddress bottom; + + do { + // tweak ranges we've passed + if (hasA && top > a.Get().Last) + hasA = a.Next(); + if (hasB && top > b.Get().Last) + hasB = b.Next(); + + if (!hasA && !hasB) { + // both rangesets have ended + bottom = TAddress::Highest(); + proc(top, bottom, nullptr, nullptr); + break; + } + + const bool inA = hasA && a.Get().First <= top; + const bool inB = hasB && b.Get().First <= top; + + if (!hasA) { + // rangeset a has ended + if (inB) { + bottom = b.Get().Last; + proc(top, bottom, nullptr, &b.Get().Data); + } else { + bottom = b.Get().First.Prev(); + proc(top, bottom, nullptr, nullptr); + } + } else if (!hasB) { + // rangeset b has ended + if (inA) { + bottom = a.Get().Last; + proc(top, bottom, &a.Get().Data, nullptr); + } else { + bottom = a.Get().First.Prev(); + proc(top, bottom, nullptr, nullptr); + } + } else if (inA && inB) { + // inside both ranges + bottom = Min(a.Get().Last, b.Get().Last); + proc(top, bottom, &a.Get().Data, &b.Get().Data); + } else if (inA) { + // only in range a + bottom = Min(a.Get().Last, b.Get().First.Prev()); + proc(top, bottom, &a.Get().Data, nullptr); + } else if (inB) { + // only in range b + bottom = Min(b.Get().Last, a.Get().First.Prev()); + proc(top, bottom, nullptr, &b.Get().Data); + } else { + // outside both ranges + bottom = Min(a.Get().First.Prev(), a.Get().First.Prev()); + proc(top, bottom, nullptr, nullptr); + } + + top = bottom.Next(); + } while (bottom != TAddress::Highest()); +} + +} diff --git a/library/cpp/ipreg/merge.h b/library/cpp/ipreg/merge.h new file mode 100644 index 0000000000..123b88276c --- /dev/null +++ b/library/cpp/ipreg/merge.h @@ -0,0 +1,11 @@ +#pragma once + +#include "reader.h" + +#include <functional> + +namespace NIPREG { + +void MergeIPREGS(TReader &a, TReader& b, std::function<void(const TAddress& first, const TAddress& last, const TString *a, const TString *b)>&& proc); + +} diff --git a/library/cpp/ipreg/range.cpp b/library/cpp/ipreg/range.cpp new file mode 100644 index 0000000000..1b90022482 --- /dev/null +++ b/library/cpp/ipreg/range.cpp @@ -0,0 +1,198 @@ +#include "range.h" + +#include "util_helpers.h" + +#include <library/cpp/int128/int128.h> +#include <util/generic/maybe.h> +#include <util/string/split.h> +#include <util/string/vector.h> + +#include <stdexcept> + +namespace NIPREG { + +namespace { + EAddressFormat CurrentFormat = EAddressFormat::SHORT_IPV6; + + void throwExceptionWithFormat(const TString& line) { + throw yexception() << "wanted format: ${ip-begin}-${ip-end}[\t${data}]; $input := '" << line << "'"; + } + + void throwIfReverseOrder(TAddress first, TAddress last) { + if (first > last) { + const TString err_msg = "reverse order of addresses (first / last) => " + first.AsIPv6() + " / " + last.AsIPv6(); + throw std::runtime_error(err_msg.data()); + } + } +} // anon-ns + +TRange::TRange(TAddress first, TAddress last, const TString& data) + : First(first) + , Last(last) + , Data(data) +{ + throwIfReverseOrder(First, Last); +} + +TRange::TRange(const TNetwork& net, const TString& data) + : TRange(net.begin, net.end, data) +{ +} + +ui128 TRange::GetAddrsQty() const { + return TAddress::Distance(First, Last) + 1; +} + +TRange TRange::BuildRange(const TString& line, bool isEmptyData, const TString& dataDelim) { + const TVector<TString> parts = StringSplitter(line).SplitBySet(dataDelim.data()).SkipEmpty(); + if (parts.empty()) { + throwExceptionWithFormat(line); + } + + if (TString::npos != parts[0].find('/')) { + const auto data = (2 == parts.size()) ? parts[1] : ""; + return TRange(TNetwork(parts[0]), data); + } + + const TVector<TString> range_parts = StringSplitter(parts[0]).SplitBySet(" -\t").SkipEmpty(); + if (2 != range_parts.size() || range_parts[0].empty() || range_parts[1].empty()) { + throwExceptionWithFormat(line); + } + + if (!isEmptyData && (2 != parts.size() || parts[1].empty())) { + throwExceptionWithFormat(line); + } + + const auto& data = (2 == parts.size()) ? parts[1] : ""; + return TRange(TAddress::ParseAny(range_parts[0]), TAddress::ParseAny(range_parts[1]), data); +} + +bool TRange::Contains(const TRange& range) const { + return First <= range.First && range.Last <= Last; +} + +bool TRange::Contains(const TAddress& ip) const { + return First <= ip && ip <= Last; +} + +void SetIpFullOutFormat() { + CurrentFormat = EAddressFormat::IPV6; +} + +void SetIpShortOutFormat() { + CurrentFormat = EAddressFormat::SHORT_IPV6; +} + +void TRange::DumpTo(IOutputStream& output, bool withData, EAddressFormat format) const { + output << First.Format(format) << '-' << Last.Format(format); + if (withData) { + output << '\t' << Data; + } +} + +bool TRange::IsIpv6Only() const { + return 6 == First.GetType() && 6 == Last.GetType(); +} + +bool TRange::IsIpv4Only() const { + return 4 == First.GetType() && 4 == Last.GetType(); +} + +bool TRange::IsRangeInSingleNet64() const { + return First.GetHigh64() == Last.GetHigh64(); +} + +TRange TRange::BuildRangeByFirst(const TRange& range, int prefix) { + Y_UNUSED(prefix); + return TRange(TAddress::MakeNet64Prefix(range.First), + TAddress::MakeNet64Broadcast(range.IsRangeInSingleNet64() ? range.Last : range.Last.GetPrevNet64()) , + range.Data + ); +} + +TRange TRange::BuildRangeByLast(const TRange& range, int prefix) { + Y_UNUSED(prefix); + const auto prevLast = TAddress::MakeNet64Broadcast(range.Last.GetPrevNet64()); + return TRange(range.First, prevLast, range.Data); +// const auto prevLast = TAddress::MakeNet64Broadcast(range.Last); +// return TRange(TAddress::MakeNet64Prefix(range.First), prevLast, range.Data); +} + +TVector<TRange> SplitRangeNets(const TRange& origRange, bool addOrigSize, int maskLen) { + Y_UNUSED(maskLen); + + static const auto firstCheckedIpv6Prefix = TAddress::ParseAny("2000::"); + + const auto& CalcNetSize = [&](const TRange& range) { + static const auto MAX_FOR_DIGITS_ANSWER = ui128{1 << 30}; + const auto netSize = range.GetAddrsQty(); + return (netSize < MAX_FOR_DIGITS_ANSWER) ? ToString(netSize) : "huge"; + }; + + const auto& AddSizeField = [&](TRange& changedRange, const TRange& origAddrRange) { + if (addOrigSize) { + changedRange.Data = AddJsonAttrs({"orig_net_size"}, changedRange.Data, TMaybe<TString>(CalcNetSize(origAddrRange))); + } + }; + + if (origRange.Last <= firstCheckedIpv6Prefix) { + return {origRange}; + } + + if (origRange.IsRangeInSingleNet64()) { + TRange theOne{ + TAddress::MakeNet64Prefix(origRange.First), + TAddress::MakeNet64Broadcast(origRange.Last), + origRange.Data + }; + AddSizeField(theOne, origRange); + return {theOne}; + } + + TRange range{origRange}; + TVector<TRange> result; { + // 1st + TRange byFirst{TAddress::MakeNet64Prefix(range.First),TAddress::MakeNet64Broadcast(range.First), range.Data}; + AddSizeField(byFirst, {range.First, byFirst.Last, ""}); + result.push_back(byFirst); + + // maybe 2nd + range.First = byFirst.Last.Next(); + if (!range.IsRangeInSingleNet64()) { + const TAddress lastPrefix = TAddress::MakeNet64Prefix(range.Last); + + TRange inTheMiddle{TAddress::MakeNet64Prefix(range.First), lastPrefix.Prev(), range.Data}; + AddSizeField(inTheMiddle, inTheMiddle); + result.push_back(inTheMiddle); + + range.First = lastPrefix; + } + + // the last + TRange byLast{range.First, TAddress::MakeNet64Broadcast(range.Last), range.Data}; + AddSizeField(byLast, {byLast.First, range.Last, ""}); + result.push_back(byLast); + } + return result; +} + +bool operator==(const TRange& lhs, const TRange& rhs) { + return lhs.First == rhs.First && lhs.Last == rhs.Last; +} + +} // ns IPREG + +IInputStream& operator>>(IInputStream& input, NIPREG::TRange& range) { + TString line; + if (!input.ReadLine(line)) { + throw std::runtime_error("unable to load data from stream"); + } + range = NIPREG::TRange::BuildRange(line); + return input; +} + +IOutputStream& operator<<(IOutputStream& output, const NIPREG::TRange& range) { + range.DumpTo(output, true, NIPREG::CurrentFormat); + output << "\n"; + return output; +} diff --git a/library/cpp/ipreg/range.h b/library/cpp/ipreg/range.h new file mode 100644 index 0000000000..15b2c693b0 --- /dev/null +++ b/library/cpp/ipreg/range.h @@ -0,0 +1,50 @@ +#pragma once + +#include "address.h" + +#include <util/generic/string.h> +#include <util/generic/vector.h> +#include <util/stream/input.h> +#include <util/stream/output.h> + +#include <stdexcept> + +namespace NIPREG { + +struct TRange { + TAddress First; + TAddress Last; + TString Data; + + TRange() = default; + TRange(TAddress first, TAddress last, const TString& data); + TRange(const TNetwork& net, const TString& data); + + ui128 GetAddrsQty() const; + void DumpTo(IOutputStream& output, bool withData = true, EAddressFormat format = EAddressFormat::SHORT_IP) const; + + static TRange BuildRange(const TString& line, bool isEmptyData = false, const TString& dataDelim = "\t"); + bool Contains(const TRange& range) const; + bool Contains(const TAddress& ip) const; + + static TRange BuildRangeByFirst(const TRange& range, int prefix = 64); + static TRange BuildRangeByLast(const TRange& range, int prefix = 64); + + bool IsIpv6Only() const; + bool IsIpv4Only() const; + + bool IsRangeInSingleNet64() const; +}; +using TGenericEntry = TRange; + +void SetIpFullOutFormat(); +void SetIpShortOutFormat(); + +TVector<TRange> SplitRangeNets(const TRange& range, bool addOrigSize = false, int maskLen = 64); + +bool operator==(const TRange& lhs, const TRange& rhs); +inline bool operator!=(const TRange& lhs, const TRange& rhs) { return !(lhs == rhs); } +} // ns NIPREG + +IInputStream& operator>>(IInputStream& input, NIPREG::TRange& range); +IOutputStream& operator<<(IOutputStream& output, const NIPREG::TRange& range); diff --git a/library/cpp/ipreg/reader.cpp b/library/cpp/ipreg/reader.cpp new file mode 100644 index 0000000000..2e4ae1b178 --- /dev/null +++ b/library/cpp/ipreg/reader.cpp @@ -0,0 +1,82 @@ +#include "reader.h" + +#include <util/stream/file.h> + +namespace NIPREG { + +namespace { + const TString DASH_FNAME = "-"; +} + +TReader::TReader(const TString& filename, bool isEmptyData, const TString& dataDelim) + : OwnedStreamPtr((filename.empty() || filename == DASH_FNAME) ? nullptr : new TFileInput(filename)) + , Stream(OwnedStreamPtr ? *OwnedStreamPtr.Get() : Cin) + , IsEmptyData(isEmptyData) + , DataDelim(dataDelim) +{ +} + +TReader::TReader(IInputStream& stream, bool isEmptyData, const TString& dataDelim) + : Stream(stream) + , IsEmptyData(isEmptyData) + , DataDelim(dataDelim) +{ +} + +bool TReader::Next() { + TString line; + if (!Stream.ReadLine(line)) + return false; + + CurrentEntry = TRange::BuildRange(line, IsEmptyData, DataDelim); + if (CurrentEntry.Data.empty()) { + if (!IsEmptyData) { + throw yexception() << "empty data part detected for [" << line << "]"; + } + CurrentEntry.Data = ""; + } + return true; +} + +TReverseByLastIpReader::TReverseByLastIpReader(const TString& filename, bool isEmptyData, const TString& dataDelim) + : TParent(filename, isEmptyData, dataDelim) +{ + Valid = TParent::Next(); +} + +TReverseByLastIpReader::TReverseByLastIpReader(IInputStream& stream, bool isEmptyData, const TString& dataDelim) + : TParent(stream, isEmptyData, dataDelim) +{ + Valid = TParent::Next(); +} + +bool TReverseByLastIpReader::Next() { + if (!CurrentEntries.empty()) { + CurrentEntries.pop_back(); + } + + if (CurrentEntries.empty()) { + return PrepareNextEntries(); + } else { + return true; + } +} + +const TGenericEntry& TReverseByLastIpReader::Get() const { + return CurrentEntries.back(); +} + +bool TReverseByLastIpReader::PrepareNextEntries() { + if (!Valid) { + return false; + } + + do { + CurrentEntries.push_back(TParent::Get()); + Valid = TParent::Next(); + } while (Valid && TParent::Get().First == CurrentEntries.back().First); + + return true; +} + +} // NIPREG diff --git a/library/cpp/ipreg/reader.h b/library/cpp/ipreg/reader.h new file mode 100644 index 0000000000..b68faedcf9 --- /dev/null +++ b/library/cpp/ipreg/reader.h @@ -0,0 +1,57 @@ +#pragma once + +#include "range.h" + +#include <util/generic/ptr.h> +#include <util/generic/string.h> +#include <util/stream/input.h> + +namespace NIPREG { + +class TReader { +public: + TReader(const TString& filename = "", bool isEmptyData = false, const TString& dataDelim = "\t"); + TReader(IInputStream& stream, bool isEmptyData = false, const TString& dataDelim = "\t"); + + virtual bool Next(); + + virtual const TGenericEntry& Get() const { + return CurrentEntry; + } + + operator IInputStream&() { + return Stream; + } + + virtual ~TReader() = default; + +private: + TAutoPtr<IInputStream> OwnedStreamPtr; + IInputStream& Stream; + + bool IsEmptyData = false; + const TString DataDelim; + + TGenericEntry CurrentEntry; +}; + +class TReverseByLastIpReader : public TReader { +public: + using TParent = TReader; + + explicit TReverseByLastIpReader(const TString& filename = "", bool isEmptyData = false, const TString& dataDelim = "\t"); + explicit TReverseByLastIpReader(IInputStream& stream, bool isEmptyData = false, const TString& dataDelim = "\t"); + + bool Next() override; + + const TGenericEntry& Get() const override; + +private: + bool PrepareNextEntries(); + +private: + bool Valid = false; + TVector<TGenericEntry> CurrentEntries; +}; + +} // NIPREG diff --git a/library/cpp/ipreg/sources.cpp b/library/cpp/ipreg/sources.cpp new file mode 100644 index 0000000000..70e4b2a6da --- /dev/null +++ b/library/cpp/ipreg/sources.cpp @@ -0,0 +1,100 @@ +#include "sources.h" + +#include <cstdint> +#include <stdexcept> + +namespace NIPREG { + +const ui32 ML_COEFF_DEFAULT = 50000; +ui32 ML_COEFFICIENT = ML_COEFF_DEFAULT; + +void SetCoefficient(ui32 type, ui32 value) { + switch (type) { + case SOURCE_ML: + ML_COEFFICIENT = value; + break; + default: + throw std::runtime_error("unsupported setcoeff-type"); + } +} + +double GetSourceCoefficient(ui32 type) { + switch (type) { + case SOURCE_MAIL: return 1; + case SOURCE_PHONE: return 3; + case SOURCE_GEO: return 4; + case SOURCE_COUNTRY: return 100; + case SOURCE_DOMAIN_NAME: return 1; + case SOURCE_MANUAL: return 1; + case SOURCE_YANDEX_NETWORK: return 1000; // NB: in yandex_noc source weight := 10K + case SOURCE_SPECIAL_NETWORK: return 1000000; + case SOURCE_PROVIDERS: return 50; + case SOURCE_MAXMIND: return 4; + case SOURCE_UNITED_UID_YANDEX_MAPS: return 0.7; + case SOURCE_RELIABILITY_AROUND: return 1; + case SOURCE_UNITED_UID_WEATHER: return 0.9; + case SOURCE_UNITED_UID_YANDEX_GID: return 1; + case SOURCE_UNITED_UID_SEARCH_QUERY: return 1.5; + case SOURCE_UNITED_UID_SEARCH_IN_REG: return 2; + case SOURCE_BGP_ASPATH_COMMUNITY: return 10; + case SOURCE_ML: return ML_COEFFICIENT; + } + return 0; +} + +bool SourceWantApplyDepthCoeff(ui32 source_type) { + switch (source_type) { + case SOURCE_MAIL: + case SOURCE_PHONE: + case SOURCE_GEO: + case SOURCE_COUNTRY: + case SOURCE_DOMAIN_NAME: + return true; + default: + return false; + } +} + +bool SourceWantApplyNetsizeCoeff(ui32 source_type) { + return SourceWantApplyDepthCoeff(source_type); +} + +bool SourceIsHuman(ui32 source_type) { + switch (source_type) { + case SOURCE_UNITED_UID_SEARCH_QUERY: + case SOURCE_UNITED_UID_SEARCH_IN_REG: + case SOURCE_UNITED_UID_WEATHER: + case SOURCE_UNITED_UID_YANDEX_GID: + case SOURCE_UNITED_UID_YANDEX_MAPS: + return true; + default: + return false; + } +} + +bool SourceIsForRegionNormalize(ui32 source_type) { + return SourceIsHuman(source_type); +} + +bool SourceIsForEnoughHumanData(ui32 source_type) { + switch (source_type) { + case SOURCE_COUNTRY: + case SOURCE_MANUAL: + case SOURCE_PROVIDERS: + case SOURCE_YANDEX_NETWORK: + case SOURCE_SPECIAL_NETWORK: + return true; + default: + return SourceIsHuman(source_type); + } +} + +bool SourceIsForFewHumanData(ui32 source_type) { + return !SourceIsHuman(source_type); +} + +bool SourceIsForReliability(ui32 source_type) { + return SourceIsHuman(source_type) || SOURCE_YANDEX_NETWORK == source_type; +} + +} // NIPREG diff --git a/library/cpp/ipreg/sources.h b/library/cpp/ipreg/sources.h new file mode 100644 index 0000000000..a517e57cb8 --- /dev/null +++ b/library/cpp/ipreg/sources.h @@ -0,0 +1,53 @@ +#pragma once + +#include <util/system/types.h> + +namespace NIPREG { + +// TODO(dieash@) make some automation/spicification via enabled sources (with full list) +enum ESourceType { + // TODO(dieash@) full list of known src-types in choice-region-data: + // https://yql.yandex-team.ru/Operations/XEo-amim9Z2_PCkcZgQ0Wu-sqXAm1K8NMPesswuPzbk= + SOURCE_UNKNOWN = 0, // stub + SOURCE_MAIL = 1 /* "MAIL" */, // ripe src + SOURCE_PHONE = 2 /* "PHONE" */, // ripe src + SOURCE_GEO = 3 /* "GEO" */, // ripe src + SOURCE_COUNTRY = 4 /* "COUNTRY" */, // ripe, delegated, maxmind src + SOURCE_DOMAIN_NAME = 5 /* "DOMAIN_NAME" */, // ripe src + SOURCE_MANUAL = 6 /* "MANUAL" */, // manual src + SOURCE_YANDEX_NETWORK = 9 /* "YANDEX_NETWORK" */, // yandex-noc src + SOURCE_SPECIAL_NETWORK = 10 /* "SPECIAL_NETWORK" */, // spec-net src + SOURCE_PROVIDERS = 15 /* "PROVIDERS" */, // ripe src + SOURCE_MAXMIND = 17 /* "MAXMIND" */, // maxmind src + SOURCE_UNITED_UID_YANDEX_MAPS = 19 /* "UNITED_UID_YANDEX_MAPS" */, // uuid src + SOURCE_RELIABILITY_AROUND = 20 /* "RELIABILITY_AROUND" */, // rel-around src + SOURCE_UNITED_UID_WEATHER = 21 /* "UNITED_UID_WEATHER" */, // uuid src + SOURCE_UNITED_UID_YANDEX_GID = 22 /* "UNITED_UID_YANDEX_GID" */, // uuid src + SOURCE_UNITED_UID_SEARCH_QUERY = 23 /* "UNITED_UID_SEARCH_QUERY" */, // uuid src + SOURCE_UNITED_UID_SEARCH_IN_REG = 24 /* "UNITED_UID_SEARCH_IN_REG" */, // uuid src + SOURCE_BGP_ASPATH_COMMUNITY = 25 /* "BGP_ASPATH_COMMUNITY" */, // bgp src // NOTA BENE: clash with https://st.yandex-team.ru/IPREG-3722#5b367ec214778c001a5a3f7c + SOURCE_ML_INT_26 = 26 /* "ML_INT_26" */, + SOURCE_ML_INT_27 = 27 /* "ML_INT_27" */, + SOURCE_ML_INT_28 = 28 /* "ML_INT_28" */, + SOURCE_ML_INT_29 = 29 /* "ML_INT_29" */, + SOURCE_ML_INT_30 = 30 /* "ML_INT_30" */, + SOURCE_ML_INT_31 = 31 /* "ML_INT_31" */, + SOURCE_ML_INT_32 = 32 /* "ML_INT_32" */, + SOURCE_ML_INT_33 = 33 /* "ML_INT_33" */, + SOURCE_ML_INT_34 = 34 /* "ML_INT_34" */, + SOURCE_PRECISE_GEO_ML = 35 /* "ML_INT_35" */, + SOURCE_ML = 36 /* "ML" */, // ml src +}; + +double GetSourceCoefficient(ui32 type); +bool SourceWantApplyDepthCoeff(ui32 source_type); +bool SourceWantApplyNetsizeCoeff(ui32 source_type); +bool SourceIsHuman(ui32 source_type); +bool SourceExcludeFromReliability(ui32 source_type); +bool SourceIsForRegionNormalize(ui32 source_type); +bool SourceIsForEnoughHumanData(ui32 source_type); +bool SourceIsForFewHumanData(ui32 source_type); +bool SourceIsForReliability(ui32 source_type); + +void SetCoefficient(ui32 type, ui32 value); +} // namespace NIPREG diff --git a/library/cpp/ipreg/split.cpp b/library/cpp/ipreg/split.cpp new file mode 100644 index 0000000000..19b7b85d51 --- /dev/null +++ b/library/cpp/ipreg/split.cpp @@ -0,0 +1,54 @@ +#include "split.h" + +#include <util/generic/list.h> +#include <util/generic/vector.h> + +namespace NIPREG { + +void SplitIPREG(TReader &reader, std::function<void(const TAddress& first, const TAddress& last, const TVector<TString>& data)>&& proc) { + TList<TGenericEntry> prevEntries; + + bool end; + do { + end = !reader.Next(); + + while (!prevEntries.empty() && (end || prevEntries.front().First < reader.Get().First)) { + // find smallest common range to process + TAddress first = prevEntries.front().First; + TAddress last = end ? TAddress::Highest() : reader.Get().First.Prev(); + + for (const auto& entry: prevEntries) + last = Min(last, entry.Last); + + // extract data for the range + TVector<TString> strings; + auto item = prevEntries.begin(); + while (item != prevEntries.end()) { + Y_ASSERT(item->First == first); + strings.push_back(item->Data); + + if (item->Last == last) { + // item completely processed, remove + auto victim = item; + item++; + prevEntries.erase(victim); + } else { + // item still have part of range left, update it + item->First = last.Next(); + item++; + } + } + + proc(first, last, strings); + } + + if (!end) { + if (!prevEntries.empty()) { + Y_ASSERT(prevEntries.front().First == reader.Get().First); + } + prevEntries.push_back(reader.Get()); + } + } while (!end); +} + +} diff --git a/library/cpp/ipreg/split.h b/library/cpp/ipreg/split.h new file mode 100644 index 0000000000..9710ff5f6d --- /dev/null +++ b/library/cpp/ipreg/split.h @@ -0,0 +1,13 @@ +#pragma once + +#include "reader.h" + +#include <util/generic/vector.h> + +#include <functional> + +namespace NIPREG { + +void SplitIPREG(TReader &reader, std::function<void(const TAddress& first, const TAddress& last, const TVector<TString>& data)>&& proc); + +} diff --git a/library/cpp/ipreg/stopwatch.cpp b/library/cpp/ipreg/stopwatch.cpp new file mode 100644 index 0000000000..31d99d2758 --- /dev/null +++ b/library/cpp/ipreg/stopwatch.cpp @@ -0,0 +1,53 @@ +#include "stopwatch.h" + +#include <util/stream/str.h> + +namespace NIPREG { + +TStopWatch::TStopWatch() { + Start = TInstant::Now(); +} + +TStopWatch::~TStopWatch() { + try { + if (TaskRunning) + StopTask(); + + Cerr << "Everything done in " << FormatTime(TInstant::Now() - Start) << Endl; + } catch (...) { + // not much problem if we can't write the summary + } +} + +void TStopWatch::StartTask(const TString& message) { + StopTask(); + + ++TaskOrdNum; + TaskStart = TInstant::Now(); + TaskRunning = true; + Cerr << TaskOrdNum << ". " << message << "...\n"; +} + +void TStopWatch::StopTask() { + if (TaskRunning) { + Cerr << "Done in " << FormatTime(TInstant::Now() - TaskStart) << Endl; + TaskRunning = false; + } +} + +TString TStopWatch::FormatTime(const TDuration& dur) { + auto sec = dur.Seconds(); + + TStringStream ss; + + if (sec < 60) + ss << sec << "s"; + else if (sec < 3600) + ss << sec / 60 << "m " << sec % 60 << "s"; + else + ss << sec / 3600 << "h " << (sec / 60) % 60 << "m"; + + return ss.Str(); +} + +} diff --git a/library/cpp/ipreg/stopwatch.h b/library/cpp/ipreg/stopwatch.h new file mode 100644 index 0000000000..0873a638f6 --- /dev/null +++ b/library/cpp/ipreg/stopwatch.h @@ -0,0 +1,25 @@ +#pragma once + +#include <util/datetime/base.h> + +namespace NIPREG { + +class TStopWatch { +private: + TInstant Start; + TInstant TaskStart; + bool TaskRunning = false; + ui32 TaskOrdNum = 0; + +private: + TString FormatTime(const TDuration& dur); + +public: + TStopWatch(); + ~TStopWatch(); + + void StartTask(const TString& message); + void StopTask(); +}; + +} diff --git a/library/cpp/ipreg/util_helpers.cpp b/library/cpp/ipreg/util_helpers.cpp new file mode 100644 index 0000000000..1b64baef55 --- /dev/null +++ b/library/cpp/ipreg/util_helpers.cpp @@ -0,0 +1,705 @@ +#include "util_helpers.h" + +#include <library/cpp/ipreg/reader.h> + +#include <library/cpp/json/json_reader.h> +#include <library/cpp/json/json_value.h> +#include <library/cpp/json/json_writer.h> + +#include <library/cpp/geobase/lookup.hpp> + +#include <util/generic/ptr.h> +#include <util/generic/vector.h> +#include <util/stream/file.h> +#include <util/stream/format.h> +#include <util/string/split.h> +#include <util/string/vector.h> +#include <util/stream/str.h> + +namespace NIPREG { + namespace { + double FindNearestCoarsedCoeff(double baseValue) { + using ValueStepPair = std::pair<double, double>; + static const double fix = 0.01; + static const TVector<ValueStepPair> limits = { + { 100., 20. + fix }, + { 500., 50. + fix }, + { 2500., 100. + fix }, + { 10000., 1000. + fix }, + { 50000., 10000. + fix } + }; + + double last_step{}; + for (const auto& pair : limits) { + last_step = pair.second; + if (baseValue <= pair.first) { + break; + } + } + return last_step; + } + + double CalcCoarsedValue(double baseValue) { + if (baseValue < 0.) { + ythrow yexception() << "negative value detected: " << baseValue; + } + + // TODO(dieash) some "strange" calculation below + const auto coarsedCoeff = FindNearestCoarsedCoeff(baseValue); + const double fixedValue = coarsedCoeff * static_cast<int>((baseValue + coarsedCoeff / 2) / coarsedCoeff); + return fixedValue; + } + + const char * const REL_FIELD = "reliability"; + const char * const REG_FIELD = "region_id"; + + void CorrectReliability(NJson::TJsonValue& jsonData, const TString& data) { + jsonData = ParseJsonString(data); + auto& jsonMap = jsonData.GetMapSafe(); + + auto& reliabilityField = jsonMap[REL_FIELD]; + reliabilityField = CalcCoarsedValue(reliabilityField.GetDouble()); + } + + TString SortJson(const TString& data) { + NJson::TJsonValue json = ParseJsonString(data); + return SortJsonData(json); + } + + static TString MergeJsonsData(const TString& data1, const TString& data2, bool sortKeys = false, bool countMerge = false) { + static const char* MERGE_QTY = "_mrg_qty_"; + + auto json1 = ParseJsonString(data1); + const auto& json2 = ParseJsonString(data2); + + if (countMerge && !json1.Has(MERGE_QTY)) { + json1.InsertValue(MERGE_QTY, 1); + } + + for (const auto& item : json2.GetMapSafe()) { + json1.InsertValue(item.first, item.second); + } + + if (countMerge) { + json1.InsertValue(MERGE_QTY, (json1[MERGE_QTY].GetInteger() + 1)); + } + + const auto NoFormat = false; + return NJson::WriteJson(json1, NoFormat, sortKeys); + } + + bool IsJsonEquals(const TVector<TString>& excludeFieldsList, const TString& data1, const TString& data2) { + if (excludeFieldsList.empty()) { + return data1 == data2; + } + + auto json1 = ParseJsonString(data1); + auto json2 = ParseJsonString(data2); + + for (const auto& excludeField : excludeFieldsList) { + json1.EraseValue(excludeField); + json2.EraseValue(excludeField); + } + + return json1 == json2; + } + + class Patcher { + public: + Patcher(TReader& base, TReader& patch, IOutputStream& output, bool sortData) + : BaseStream(base) + , PatchStream(patch) + , Output(output) + , SortData(sortData) + { + GetNext(BaseStream, BaseRangePtr); + GetNext(PatchStream, PatchRangePtr); + } + + void Process() { + while (BaseRangePtr || PatchRangePtr) { + if ( CheckPatch() + || OnlySecond(BaseRangePtr, PatchRangePtr, PatchStream) + || OnlySecond(PatchRangePtr, BaseRangePtr, BaseStream) + || Range1BeforeRange2(BaseRangePtr, PatchRangePtr, BaseStream) + || Range1BeforeRange2(PatchRangePtr, BaseRangePtr, PatchStream) + || FirstEndInSecond(BaseRangePtr, PatchRangePtr) + || FirstEndInSecond(PatchRangePtr, BaseRangePtr) + || FirstStartInSecond(BaseRangePtr, PatchRangePtr, BaseStream, PatchStream)) + { + continue; + } + } + } + + private: + void GetNext(TReader& stream, TAutoPtr<TRange>& rangePtr) { + if (stream.Next()) { + if (rangePtr) { + *rangePtr = stream.Get(); + } else { + rangePtr.Reset(new TRange(stream.Get())); + } + } + else { + rangePtr.Reset(); + } + } + + void Print(const TRange& range) const { + Output << range; + } + + void PrintSorted(const TRange& range) const { + const TRange sortedCopy{range.First, range.Last, SortJson(range.Data)}; + Output << sortedCopy; + } + + bool CheckPatch() { + if (PatchRangePtr && PatchRangePtr->First > PatchRangePtr->Last) { + GetNext(PatchStream, PatchRangePtr); + return true; + } + return false; + } + + bool OnlySecond(TAutoPtr<TRange>& first, TAutoPtr<TRange>& second, TReader& stream) { + if (!first && second) { + Print(*second); + GetNext(stream, second); + return true; + } + return false; + } + + bool Range1BeforeRange2(TAutoPtr<TRange>& first, TAutoPtr<TRange>& second, TReader& stream) { + if (first->Last < second->First) { + Print(*first); + GetNext(stream, first); + return true; + } + return false; + } + + bool FirstEndInSecond(TAutoPtr<TRange>& first, TAutoPtr<TRange>& second) { + if (first->First < second->First) { + auto leftBaseRange = *first; + leftBaseRange.Last = second->First.Prev(); + Print(leftBaseRange); + + first->First = second->First; + return true; + } + return false; + } + + bool FirstStartInSecond(TAutoPtr<TRange>& first, TAutoPtr<TRange>& second, TReader& stream1, TReader& stream2) { + if (first->First >= second->First) { + auto leftBaseRange = *first; + leftBaseRange.Data = MergeJsonsData(first->Data, second->Data); + + if (first->Last <= second->Last) { + second->First = first->Last.Next(); + GetNext(stream1, first); + if (second->First == TAddress::Highest()) { + GetNext(stream2, second); + } + } else { + leftBaseRange.Last = second->Last; + first->First = second->Last.Next(); + GetNext(stream2, second); + } + + SortData ? PrintSorted(leftBaseRange) : Print(leftBaseRange); + return true; + } + return false; + } + + private: + TAutoPtr<TRange> BaseRangePtr; + TAutoPtr<TRange> PatchRangePtr; + + TReader& BaseStream; + TReader& PatchStream; + IOutputStream& Output; + const bool SortData = false; + }; + + struct IpChecker { + static void LessOrEqual(const size_t row, const TAddress& lastIp, const TAddress& checkedIp) { + if (lastIp <= checkedIp) { + return; + } + GenErr(row, " <= ", lastIp, checkedIp); + } + + static void Less(const size_t row, const TAddress& lastIp, const TAddress& checkedIp) { + if (lastIp < checkedIp) { + return; + } + GenErr(row, " < ", lastIp, checkedIp); + } + + static void GenErr(const size_t row, const char* msg, const TAddress& lastIp, const TAddress& checkedIp) { + const TString& errMsg = ">>> row#" + ToString(row) + "; " + lastIp.AsIPv6() + msg + checkedIp.AsIPv6(); + throw std::runtime_error(errMsg.data()); + } + }; + + class MergerBy3 { + public: + MergerBy3(const TString& geodataPath, IOutputStream& output) + : Geobase(geodataPath) + , Out(output) + {} + + void Process(TReader& input, bool ByRegsOnly, bool silentMode) { + while (input.Next()) { + Trio.push_back(input.Get()); + if (3 > Trio.size()) { + continue; + } + + auto& range2Data = (++Trio.begin())->Data; + if (range2Data.npos != range2Data.find("\"is_placeholder\":1")) { + PrintAndDrop1stRange(); + PrintAndDrop1stRange(); + continue; + } + + const auto range1RegId = GetRegionId(Trio.begin()->Data); + const auto range3RegId = GetRegionId(Trio.rbegin()->Data); + if (range1RegId != range3RegId) { + PrintAndDrop1stRange(); + continue; + } + + const auto range2RegId = GetRegionId(range2Data); + const auto& parentsIds = Geobase.GetParentsIds(range1RegId); + if (parentsIds.end() == std::find(parentsIds.begin() + 1, parentsIds.end(), range2RegId)) { + PrintAndDrop1stRange(); + continue; + } + + if (!ByRegsOnly) { + const auto range1Size = Trio.begin()->GetAddrsQty(); + const auto range2Size = (++Trio.begin())->GetAddrsQty(); + const auto range3Size = Trio.rbegin()->GetAddrsQty(); + + if (range2Size > (range1Size + range3Size)) { + PrintAndDrop1stRange(); + continue; + } + } + + range2Data = SubstRegionId(range2Data, range1RegId); + if (!silentMode) { + PrintSubstNote(range2RegId, range1RegId); + } + + PrintAndDrop1stRange(); // 1st + PrintAndDrop1stRange(); // 2nd + } + + while (Trio.end() != Trio.begin()) { + PrintAndDrop1stRange(); + } + } + private: + void PrintAndDrop1stRange() { + Out << *Trio.begin(); + Trio.erase(Trio.begin()); + } + + void PrintSubstNote(const int oldId, const int newId) { + const bool NoData = false; + Cerr << "s/" << oldId << "/" << newId << "/: ["; + + Trio.begin()->DumpTo(Cerr, NoData); + Cerr << "/" << Trio.begin()->GetAddrsQty() << " | "; + + const auto& range2nd = *(++Trio.begin()); + range2nd.DumpTo(Cerr, NoData); + Cerr << "/" << range2nd.GetAddrsQty() << " | "; + + Trio.rbegin()->DumpTo(Cerr, NoData); + Cerr << "/" << Trio.rbegin()->GetAddrsQty() << "]\n"; + } + + + static int GetRegionId(const TString& data) { + const auto& json = ParseJsonString(data); + auto reg_id = json["region_id"].GetIntegerSafe(0); + return 99999 == reg_id ? 10000 : reg_id; + } + + static TString SubstRegionId(const TString& data, const int newId) { + auto json = ParseJsonString(data); + json.InsertValue("region_id", newId); + return SortJsonData(json); + } + + const NGeobase::TLookup Geobase; + IOutputStream& Out; + TList<TRange> Trio; + }; + } // anon-ns + + void DoCoarsening(IInputStream& input, IOutputStream& output) { + TString line; + while (input.ReadLine(line)) { + TVector<TString> parts; + StringSplitter(line).Split('\t').AddTo(&parts); + + NJson::TJsonValue jsonData; + CorrectReliability(jsonData, parts[1]); + output << parts[0] << "\t" << "{\"" + << REG_FIELD << "\":" << jsonData[REG_FIELD] << ",\"" + << REL_FIELD << "\":" << Prec(jsonData[REL_FIELD].GetDouble(), PREC_POINT_DIGITS_STRIP_ZEROES, 2) + << "}\n"; + } + } + + void DoMergeEqualsRange(TReader& input, IOutputStream& output) { + // TODO(dieash@) may be check region for parent/child relation + // , const TString& geodataPath + // NGeobase::TLookup geoLookup(geodataPath); + + TVector<TString> rangeDataList; + TRange lastRange{}; + + const char* REG_ID_ATTR = "region_id"; + const char* ORG_NET_ATTR = "orig_net_size"; + const char* HUGE_SIZE_VALUE = "huge"; + + const int HUGE_SIZE_COEFF = 100; + + const auto CalcRegionBinding = [&]() { + if (rangeDataList.empty()) { + throw std::runtime_error("empty data list"); + } + + if (1 == rangeDataList.size()) { + return rangeDataList[0]; + } + + size_t maxAmount{}; + NJson::TJsonValue maxData; + + THashMap<NGeobase::TId, size_t> reg2amount; + for (const auto& data : rangeDataList) { + const auto& json = ParseJsonString(data); + + const auto id = json[REG_ID_ATTR].GetInteger(); + const auto amount = (json.Has(ORG_NET_ATTR) && HUGE_SIZE_VALUE == json[ORG_NET_ATTR].GetString()) ? HUGE_SIZE_COEFF : FromString<int>(json[ORG_NET_ATTR].GetString()); + reg2amount[id] += amount; + + if (reg2amount[id] > maxAmount) { + maxData = json; + } + } + + maxData.EraseValue(ORG_NET_ATTR); + return SortJsonData(maxData); + }; + + const auto PrintRow = [&]() { + if (rangeDataList.empty()) { + return; + } + lastRange.Data = CalcRegionBinding(); + output << lastRange; + }; + + while (input.Next()) { + auto currRange = input.Get(); + if (currRange != lastRange) { + PrintRow(); + + lastRange = currRange; + rangeDataList = {}; + } + + rangeDataList.push_back(currRange.Data); + } + PrintRow(); + } + + void DoMerging(TReader& input, IOutputStream& output, const MergeTraits& traits) { + if (!input.Next()) { + return; // empty file here + } + + const bool IsJsonData = traits.ConcatSep.empty(); + + TRange joinedRange = input.Get(); + if (traits.SortData) { + joinedRange.Data = SortJson(joinedRange.Data); + } + + while (input.Next()) { + auto currRange = input.Get(); + if (traits.SortData) { + currRange.Data = SortJson(currRange.Data); + } + + if (currRange.Contains(joinedRange) && joinedRange.Data == currRange.Data) { + joinedRange = currRange; + continue; + } + + if (traits.JoinNestedRanges && joinedRange.Contains(currRange) && joinedRange.Data == currRange.Data) { + continue; + } + + if ( currRange.First != joinedRange.Last.Next() + || ( IsJsonData && !IsJsonEquals(traits.ExcludeFieldsList, currRange.Data, joinedRange.Data)) + || (!IsJsonData && currRange.Data != joinedRange.Data)) + { + output << joinedRange; + joinedRange = currRange; + } else { + if (IsJsonData) { + joinedRange.Data = MergeJsonsData(currRange.Data, joinedRange.Data, traits.SortData, traits.CountMerges); + } else { + joinedRange.Data = (joinedRange.Data == currRange.Data) ? joinedRange.Data : (joinedRange.Data + traits.ConcatSep + currRange.Data); + } + joinedRange.Last = currRange.Last; + } + } + + output << joinedRange; + } + + void DoMerging3(TReader& input, IOutputStream& output, const TString& geodata, bool ByRegsOnly, bool silentMode) { + MergerBy3 merger(geodata, output); + merger.Process(input, ByRegsOnly, silentMode); + } + + void DoPatching(TReader& base, TReader& patch, IOutputStream& output, bool sortData) { + Patcher(base, patch, output, sortData).Process(); + } + + const TString STUB_DATA{"{\"is_placeholder\":1,\"region_id\":10000,\"reliability\":0}"}; + + void AddStubRanges(TReader& input, IOutputStream& output) { + TRange stub{ + TAddress::Lowest(), + TAddress::Lowest(), + STUB_DATA + }; + + while (input.Next()) { + const auto& currRange = input.Get(); + + if (stub.First > currRange.First) { + const TString& errMsg = ">>> bad ranges ($stub.begin > $next.begin) // " + stub.First.AsShortIPv6() + " | " + currRange.First.AsShortIPv6(); + throw std::runtime_error(errMsg.data()); + } + + if (stub.First < currRange.First) { + stub.Last = currRange.First.Prev(); + output << stub; + } + + output << currRange; + stub.First = currRange.Last.Next(); + } + + if (stub.First != TAddress::Highest()) { + stub.Last = TAddress::Highest(); + output << stub; + } + } + + void CheckAddressSpaceForCompleteness(IInputStream& input, IOutputStream& output) { + TAddress lastIp = TAddress::Lowest(); + size_t row_number = 0; + + TString line; + while (input.ReadLine(line)) { + ++row_number; + output << line << "\n"; + + const auto& currRange = TRange::BuildRange(line); + if (row_number == 1) { + if (currRange.First != TAddress::Lowest()) { + const TString err_msg = "bad first addr (ip / wanted_ip) => " + currRange.First.AsIPv6() + " / " + TAddress::Lowest().AsIPv6(); + throw std::runtime_error(err_msg); + } + lastIp = currRange.Last; + continue; + } + + if (lastIp == currRange.First || lastIp.Next() != currRange.First) { + const TString err_msg = ">>> row#" + ToString(row_number) + " bad pair (last_ip / next_ip) => " + lastIp.AsIPv6() + " / " + currRange.First.AsIPv6(); + throw std::runtime_error(err_msg); + } + + lastIp = currRange.Last; + } + + if (lastIp != TAddress::Highest()) { + const TString err_msg = "bad last addr (last_ip / wanted_ip) => " + lastIp.AsIPv6() + " / " + TAddress::Highest().AsIPv6(); + throw std::runtime_error(err_msg); + } + } + + void CheckRangesForMonotonicSequence(IInputStream& input, IOutputStream& output, bool IsStrict) { + TAddress lastIp = TAddress::Lowest(); + + size_t row = 0; + TString line; + while (input.ReadLine(line)) { + ++row; + output << line << "\n"; + + const auto& currRange = TRange::BuildRange(line); + if (row == 1) { + lastIp = currRange.Last; + continue; + } + + if (IsStrict) { + IpChecker::Less(row, lastIp, currRange.First); + } else { + IpChecker::LessOrEqual(row, lastIp, currRange.First); + } + lastIp = currRange.Last; + } + } + + NJson::TJsonValue ParseJsonString(const TString& data) { + const auto throwIfError = true; + + NJson::TJsonValue json; + NJson::ReadJsonFastTree(data, &json, throwIfError); + return json; + } + + TString SortJsonData(const NJson::TJsonValue& json) { + const auto NoFormat = false; + const auto SortKeys = true; + + return NJson::WriteJson(json, NoFormat, SortKeys); + } + + TString SortJsonData(const TString& jsonStr) { + return SortJsonData(ParseJsonString(jsonStr)); + } + + TString AddJsonAttrs(const TVector<TString>& addFieldsList, const TString& jsonStr, const TMaybe<TString>& attrValue) { + if (addFieldsList.empty()) { + return jsonStr; + } + + auto json = ParseJsonString(jsonStr); + for (const auto& newField : addFieldsList) { + if (!newField.empty()) { + if (attrValue) { + json.InsertValue(newField, *attrValue); + } else { + json.InsertValue(newField, 1); + } + } + } + return json.GetStringRobust(); + } + + TString ExcludeJsonAttrs(const TVector<TString>& excludeFieldsList, const TString& jsonStr) { + if (excludeFieldsList.empty()) { + return jsonStr; + } + + auto json = ParseJsonString(jsonStr); + for (const auto& excludeField : excludeFieldsList) { + if (!excludeField.empty()) { + json.EraseValue(excludeField); + } + } + return json.GetStringRobust(); + } + + TString ExtractJsonAttrs(const TVector<TString>& extractFieldsList, const TString& jsonStr) { + if (extractFieldsList.empty()) { + return jsonStr; + } + + auto json = ParseJsonString(jsonStr); + NJson::TJsonValue newJson; + for (const auto& field : extractFieldsList) { + if (json.Has(field)) { + newJson.InsertValue(field, json[field]); + } + } + if (!newJson.IsDefined()) { + return {}; + } + return newJson.GetStringRobust(); + } + + namespace CliParamsDesc { + const TString InputFnameParam = "input-data"; + const TString OutputFnameParam = "output-data"; + const TString OutputFullIpParam = "show-full-ip"; + const TString PrintStatsParam = "print-stats"; + const TString PrintYtStatsParam = "yt-stats"; + + const TString InputFnameParamDesc = "path to input IPREG-data; leave empty or use '-' for stdin"; + const TString OutputFnameParamDesc = "path to file for output results; leave empty for stdout"; + const TString OutputFullIpParamDesc = "print full ipv6 (by default - short)"; + const TString PrintStatsParamDesc = "print internal statistics; @stderr"; + const TString PrintYtStatsParamDesc = "print YT-stats (by default, file-descriptor 5)"; + } // ns CliParamsDesc + + DefaultCliParams::DefaultCliParams() { + using namespace CliParamsDesc; + + Opts.SetFreeArgsMax(0); + Opts.AddHelpOption('h'); + + Opts.AddLongOption('i', InputFnameParam) + .RequiredArgument("filename") + .DefaultValue(InputFname) + .StoreResult(&InputFname).Help(InputFnameParamDesc); + + Opts.AddLongOption('o', OutputFnameParam) + .RequiredArgument("filename") + .DefaultValue(OutputFname) + .StoreResult(&OutputFname).Help(OutputFnameParamDesc); + + Opts.AddLongOption('f', OutputFullIpParam) + .Optional() + .NoArgument() + .DefaultValue("0") + .OptionalValue("1") + .StoreResult(&OutputFullIp).Help(OutputFullIpParamDesc); + + Opts.AddLongOption(PrintStatsParam) + .Optional() + .NoArgument() + .DefaultValue("0") + .OptionalValue("1") + .StoreResult(&PrintStats).Help(PrintStatsParamDesc); + + Opts.AddLongOption(PrintYtStatsParam) + .Optional() + .NoArgument() + .DefaultValue("0") + .OptionalValue("1") + .StoreResult(&PrintYtStats).Help(PrintYtStatsParamDesc); + } + + void DefaultCliParams::ApplyFlags() const { + if (OutputFullIp) { + SetIpFullOutFormat(); + } + } + + void DefaultCliParams::Parse(int argc, const char **argv) { + NLastGetopt::TOptsParseResult optRes(&GetOpts(), argc, argv); + ApplyFlags(); + } + +} // NIPREG diff --git a/library/cpp/ipreg/util_helpers.h b/library/cpp/ipreg/util_helpers.h new file mode 100644 index 0000000000..eab2dfb320 --- /dev/null +++ b/library/cpp/ipreg/util_helpers.h @@ -0,0 +1,65 @@ +#pragma once + +#include <library/cpp/getopt/opt.h> +#include <util/generic/string.h> +#include <util/generic/maybe.h> + +class IInputStream; +class IOutputStream; + +namespace NJson { + class TJsonValue; +} + +namespace NIPREG { + class TReader; + + // @input any form of range+payload + // @output $ip.begin-$ip.end \t {"region_id":$reg,"reliability":$rel} + void DoCoarsening(IInputStream& input, IOutputStream& output); + + struct MergeTraits { + const TVector<TString> ExcludeFieldsList; + TString ConcatSep; + bool SortData{}; + bool CountMerges{}; + bool JoinNestedRanges{}; + }; + + void DoMerging(TReader& input, IOutputStream& output, const MergeTraits& traits); + void DoMerging3(TReader& input, IOutputStream& output, const TString& geodata, bool ByRegsOnly = false, bool silentMode = false); + void DoMergeEqualsRange(TReader& input, IOutputStream& output); + + void DoPatching(TReader& base, TReader& patch, IOutputStream& output, bool sortData = false); + + void AddStubRanges(TReader& input, IOutputStream& output); + + void CheckAddressSpaceForCompleteness(IInputStream& input, IOutputStream& output); + void CheckRangesForMonotonicSequence(IInputStream& input, IOutputStream& output, bool IsStrict = false); + + NJson::TJsonValue ParseJsonString(const TString& data); + TString SortJsonData(const NJson::TJsonValue& json); + TString SortJsonData(const TString& json); + + TString AddJsonAttrs(const TVector<TString>& addFieldsList, const TString& jsonStr, const TMaybe<TString>& attrValue); + TString ExcludeJsonAttrs(const TVector<TString>& excludeFieldsList, const TString& jsonStr); + TString ExtractJsonAttrs(const TVector<TString>& excludeFieldsList, const TString& jsonStr); + + extern const TString STUB_DATA; + + struct DefaultCliParams { + DefaultCliParams(); + + NLastGetopt::TOpts& GetOpts() { return Opts; } + void Parse(int argc, const char **argv); + void ApplyFlags() const; + + TString InputFname = "-"; + TString OutputFname = ""; + bool OutputFullIp = false; + bool PrintStats = false; + bool PrintYtStats = false; + + NLastGetopt::TOpts Opts; + }; +} // NIPREG diff --git a/library/cpp/ipreg/writer.cpp b/library/cpp/ipreg/writer.cpp new file mode 100644 index 0000000000..89f8c8b629 --- /dev/null +++ b/library/cpp/ipreg/writer.cpp @@ -0,0 +1,91 @@ +#include "writer.h" + +#include <util/stream/file.h> + +namespace NIPREG { + +TWriter::TWriter(const TString& fname) + : OwnedStreamPtr(fname.empty() ? nullptr : new TFileOutput(fname)) + , Stream(OwnedStreamPtr ? *OwnedStreamPtr.Get() : Cout) + , AddrSeparator(ADDR_SEP) + , DataSeparator(DATA_SEP) + , SplitMixed(false) +{ +} + +TWriter::TWriter(IOutputStream& stream, EAddressFormat addressFormat, const TString& addrSep, const TString& dataSep, const bool splitMixed) + : Stream(stream) + , AddressFormat(addressFormat) + , AddrSeparator(addrSep) + , DataSeparator(dataSep) + , SplitMixed(splitMixed) +{ +} + +namespace { + const TAddress IPv4Start = TAddress::ParseIPv4("0.0.0.0"); + const TAddress IPv4End = TAddress::ParseIPv4("255.255.255.255"); + + const TAddress IPv6BeforeV4 = IPv4Start.Prev(); + const TAddress IPv6AfterV4 = IPv4End.Next(); +} + +void TWriter::Write(const TAddress& first, const TAddress& last, const TString& data, bool printRange) { + if (SplitMixed) { + if (first < IPv4Start && IPv4Start < last) { + Write(first, IPv6BeforeV4, data, printRange); + Write(IPv4Start, last, data, printRange); + return; + } + + if (first < IPv4End && IPv4End < last) { + Write(first, IPv4End, data, printRange); + Write(IPv6AfterV4, last, data, printRange); + return; + } + } + WriteImpl(first, last, data, printRange); +} + +void TWriter::WriteImpl(const TAddress& first, const TAddress& last, const TString& data, bool printRange) { + if (printRange) { + Stream << first.Format(AddressFormat) << AddrSeparator << last.Format(AddressFormat); + } + if (!data.empty()) { + if (printRange) { + Stream << DataSeparator; + } + Stream << data; + } + if (!data.empty() || printRange) { + Stream << "\n"; + } +} + +void TWriter::Finalize() { +} + +TMergingWriter::TMergingWriter(IOutputStream& stream, EAddressFormat addressFormat, const TString& addrSep, const TString& dataSep, const bool splitMixed) + : TWriter(stream, addressFormat, addrSep, dataSep, splitMixed) { +} + +void TMergingWriter::Write(const TAddress& first, const TAddress& last, const TString& data, bool) { + if (Initialized && data == StoredData && first == StoredLast.Next()) { + StoredLast = last; + } else { + if (Initialized) + TWriter::Write(StoredFirst, StoredLast, StoredData); + StoredFirst = first; + StoredLast = last; + StoredData = data; + Initialized = true; + } +} + +void TMergingWriter::Finalize() { + if (Initialized) + TWriter::Write(StoredFirst, StoredLast, StoredData); + Initialized = false; +} + +} // NIPREG diff --git a/library/cpp/ipreg/writer.h b/library/cpp/ipreg/writer.h new file mode 100644 index 0000000000..a4232a89a6 --- /dev/null +++ b/library/cpp/ipreg/writer.h @@ -0,0 +1,62 @@ +#pragma once + +#include "range.h" + +#include <util/generic/ptr.h> +#include <util/generic/string.h> +#include <util/stream/output.h> + +namespace NIPREG { + +class TWriter { +public: + static constexpr char const * const ADDR_SEP = "-"; + static constexpr char const * const DATA_SEP = "\t"; + +public: + TWriter(const TString& filename = ""); + TWriter(IOutputStream& stream, EAddressFormat addressFormat = EAddressFormat::IPV6, const TString& addrSep = ADDR_SEP, const TString& dataSep = DATA_SEP, const bool splitMixed = false); + TWriter(IOutputStream& stream, const TString& addrSep, EAddressFormat addressFormat) + : TWriter(stream, addressFormat, addrSep, addrSep) + {} + virtual ~TWriter() {} + + void Write(const TGenericEntry& entry, bool printRange = true) { + Write(entry.First, entry.Last, entry.Data, printRange); + } + virtual void Write(const TAddress& first, const TAddress& last, const TString& data, bool printRange = true); + virtual void Finalize(); + + operator IOutputStream&() { + return Stream; + } + +private: + void WriteImpl(const TAddress& first, const TAddress& last, const TString& data, bool printRange); + + TAutoPtr<IOutputStream> OwnedStreamPtr; + IOutputStream& Stream; + + EAddressFormat AddressFormat = EAddressFormat::IPV6; + const TString AddrSeparator = ADDR_SEP; + const TString DataSeparator = DATA_SEP; + const bool SplitMixed; +}; + +class TMergingWriter : public TWriter { +public: + TMergingWriter(IOutputStream& stream, EAddressFormat addressFormat = EAddressFormat::IPV6, const TString& addrSep = ADDR_SEP, const TString& dataSep = DATA_SEP, const bool splitMixed = false); + TMergingWriter(IOutputStream& stream, const TString& addrSep, EAddressFormat addressFormat) + : TWriter(stream, addressFormat, addrSep, addrSep) + {} + void Write(const TAddress& first, const TAddress& last, const TString& data, bool printRange = true) final override; + void Finalize() final; + +private: + TAddress StoredFirst; + TAddress StoredLast; + TString StoredData; + bool Initialized = false; +}; + +} // NIPREG diff --git a/library/cpp/ipreg/ya.make b/library/cpp/ipreg/ya.make new file mode 100644 index 0000000000..b03720f761 --- /dev/null +++ b/library/cpp/ipreg/ya.make @@ -0,0 +1,26 @@ +LIBRARY() + +SRCS( + address.cpp + checker.cpp + merge.cpp + range.cpp + reader.cpp + sources.cpp + split.cpp + stopwatch.cpp + writer.cpp + util_helpers.cpp +) + +PEERDIR( + library/cpp/getopt/small + library/cpp/json + library/cpp/geobase + library/cpp/int128 +) + +GENERATE_ENUM_SERIALIZATION(address.h) +GENERATE_ENUM_SERIALIZATION(sources.h) + +END() diff --git a/library/cpp/langmask/CMakeLists.txt b/library/cpp/langmask/CMakeLists.txt new file mode 100644 index 0000000000..499930c4b0 --- /dev/null +++ b/library/cpp/langmask/CMakeLists.txt @@ -0,0 +1,9 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(proto) diff --git a/library/cpp/langmask/proto/CMakeLists.darwin-x86_64.txt b/library/cpp/langmask/proto/CMakeLists.darwin-x86_64.txt new file mode 100644 index 0000000000..e9f692d0f2 --- /dev/null +++ b/library/cpp/langmask/proto/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,43 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) + +add_library(cpp-langmask-proto) +target_link_libraries(cpp-langmask-proto PUBLIC + contrib-libs-cxxsupp + yutil + contrib-libs-protobuf +) +target_proto_messages(cpp-langmask-proto PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/langmask/proto/langmask.proto +) +target_proto_addincls(cpp-langmask-proto + ./ + ${CMAKE_SOURCE_DIR}/ + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src +) +target_proto_outs(cpp-langmask-proto + --cpp_out=${CMAKE_BINARY_DIR}/ + --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ +) diff --git a/library/cpp/langmask/proto/CMakeLists.linux-aarch64.txt b/library/cpp/langmask/proto/CMakeLists.linux-aarch64.txt new file mode 100644 index 0000000000..61f975983e --- /dev/null +++ b/library/cpp/langmask/proto/CMakeLists.linux-aarch64.txt @@ -0,0 +1,44 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) + +add_library(cpp-langmask-proto) +target_link_libraries(cpp-langmask-proto PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + contrib-libs-protobuf +) +target_proto_messages(cpp-langmask-proto PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/langmask/proto/langmask.proto +) +target_proto_addincls(cpp-langmask-proto + ./ + ${CMAKE_SOURCE_DIR}/ + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src +) +target_proto_outs(cpp-langmask-proto + --cpp_out=${CMAKE_BINARY_DIR}/ + --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ +) diff --git a/library/cpp/langmask/proto/CMakeLists.linux-x86_64.txt b/library/cpp/langmask/proto/CMakeLists.linux-x86_64.txt new file mode 100644 index 0000000000..61f975983e --- /dev/null +++ b/library/cpp/langmask/proto/CMakeLists.linux-x86_64.txt @@ -0,0 +1,44 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) + +add_library(cpp-langmask-proto) +target_link_libraries(cpp-langmask-proto PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + contrib-libs-protobuf +) +target_proto_messages(cpp-langmask-proto PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/langmask/proto/langmask.proto +) +target_proto_addincls(cpp-langmask-proto + ./ + ${CMAKE_SOURCE_DIR}/ + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src +) +target_proto_outs(cpp-langmask-proto + --cpp_out=${CMAKE_BINARY_DIR}/ + --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ +) diff --git a/library/cpp/langmask/proto/CMakeLists.txt b/library/cpp/langmask/proto/CMakeLists.txt new file mode 100644 index 0000000000..f8b31df0c1 --- /dev/null +++ b/library/cpp/langmask/proto/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) + include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/library/cpp/langmask/proto/CMakeLists.windows-x86_64.txt b/library/cpp/langmask/proto/CMakeLists.windows-x86_64.txt new file mode 100644 index 0000000000..e9f692d0f2 --- /dev/null +++ b/library/cpp/langmask/proto/CMakeLists.windows-x86_64.txt @@ -0,0 +1,43 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) + +add_library(cpp-langmask-proto) +target_link_libraries(cpp-langmask-proto PUBLIC + contrib-libs-cxxsupp + yutil + contrib-libs-protobuf +) +target_proto_messages(cpp-langmask-proto PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/langmask/proto/langmask.proto +) +target_proto_addincls(cpp-langmask-proto + ./ + ${CMAKE_SOURCE_DIR}/ + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src +) +target_proto_outs(cpp-langmask-proto + --cpp_out=${CMAKE_BINARY_DIR}/ + --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ +) diff --git a/library/cpp/langmask/proto/langmask.proto b/library/cpp/langmask/proto/langmask.proto new file mode 100644 index 0000000000..be23ecfbba --- /dev/null +++ b/library/cpp/langmask/proto/langmask.proto @@ -0,0 +1,6 @@ +package NProto; + +message TLangMask { + repeated uint32 Bits = 1; // binary + optional string Names = 2; // human readable +} diff --git a/library/cpp/langmask/proto/ya.make b/library/cpp/langmask/proto/ya.make new file mode 100644 index 0000000000..823a0ad261 --- /dev/null +++ b/library/cpp/langmask/proto/ya.make @@ -0,0 +1,11 @@ +PROTO_LIBRARY() + +SRCS( + langmask.proto +) + +IF (NOT PY_PROTOS_FOR) + EXCLUDE_TAGS(GO_PROTO) +ENDIF() + +END() diff --git a/library/cpp/microbdb/CMakeLists.darwin-x86_64.txt b/library/cpp/microbdb/CMakeLists.darwin-x86_64.txt new file mode 100644 index 0000000000..c4d2e9d3a4 --- /dev/null +++ b/library/cpp/microbdb/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,56 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +find_package(ZLIB REQUIRED) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) + +add_library(library-cpp-microbdb) +target_link_libraries(library-cpp-microbdb PUBLIC + contrib-libs-cxxsupp + yutil + contrib-libs-fastlz + contrib-libs-libc_compat + contrib-libs-protobuf + contrib-libs-snappy + ZLIB::ZLIB + cpp-deprecated-fgood + cpp-on_disk-st_hash + library-cpp-packedtypes +) +target_proto_messages(library-cpp-microbdb PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/microbdb/noextinfo.proto +) +target_sources(library-cpp-microbdb PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/microbdb/file.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/microbdb/header.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/microbdb/microbdb.cpp +) +target_proto_addincls(library-cpp-microbdb + ./ + ${CMAKE_SOURCE_DIR}/ + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src +) +target_proto_outs(library-cpp-microbdb + --cpp_out=${CMAKE_BINARY_DIR}/ + --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ +) diff --git a/library/cpp/microbdb/CMakeLists.linux-aarch64.txt b/library/cpp/microbdb/CMakeLists.linux-aarch64.txt new file mode 100644 index 0000000000..302dbd03cd --- /dev/null +++ b/library/cpp/microbdb/CMakeLists.linux-aarch64.txt @@ -0,0 +1,57 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +find_package(ZLIB REQUIRED) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) + +add_library(library-cpp-microbdb) +target_link_libraries(library-cpp-microbdb PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + contrib-libs-fastlz + contrib-libs-libc_compat + contrib-libs-protobuf + contrib-libs-snappy + ZLIB::ZLIB + cpp-deprecated-fgood + cpp-on_disk-st_hash + library-cpp-packedtypes +) +target_proto_messages(library-cpp-microbdb PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/microbdb/noextinfo.proto +) +target_sources(library-cpp-microbdb PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/microbdb/file.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/microbdb/header.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/microbdb/microbdb.cpp +) +target_proto_addincls(library-cpp-microbdb + ./ + ${CMAKE_SOURCE_DIR}/ + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src +) +target_proto_outs(library-cpp-microbdb + --cpp_out=${CMAKE_BINARY_DIR}/ + --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ +) diff --git a/library/cpp/microbdb/CMakeLists.linux-x86_64.txt b/library/cpp/microbdb/CMakeLists.linux-x86_64.txt new file mode 100644 index 0000000000..302dbd03cd --- /dev/null +++ b/library/cpp/microbdb/CMakeLists.linux-x86_64.txt @@ -0,0 +1,57 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +find_package(ZLIB REQUIRED) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) + +add_library(library-cpp-microbdb) +target_link_libraries(library-cpp-microbdb PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + contrib-libs-fastlz + contrib-libs-libc_compat + contrib-libs-protobuf + contrib-libs-snappy + ZLIB::ZLIB + cpp-deprecated-fgood + cpp-on_disk-st_hash + library-cpp-packedtypes +) +target_proto_messages(library-cpp-microbdb PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/microbdb/noextinfo.proto +) +target_sources(library-cpp-microbdb PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/microbdb/file.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/microbdb/header.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/microbdb/microbdb.cpp +) +target_proto_addincls(library-cpp-microbdb + ./ + ${CMAKE_SOURCE_DIR}/ + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src +) +target_proto_outs(library-cpp-microbdb + --cpp_out=${CMAKE_BINARY_DIR}/ + --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ +) diff --git a/library/cpp/microbdb/CMakeLists.txt b/library/cpp/microbdb/CMakeLists.txt new file mode 100644 index 0000000000..f8b31df0c1 --- /dev/null +++ b/library/cpp/microbdb/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) + include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/library/cpp/microbdb/CMakeLists.windows-x86_64.txt b/library/cpp/microbdb/CMakeLists.windows-x86_64.txt new file mode 100644 index 0000000000..c4d2e9d3a4 --- /dev/null +++ b/library/cpp/microbdb/CMakeLists.windows-x86_64.txt @@ -0,0 +1,56 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +find_package(ZLIB REQUIRED) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) + +add_library(library-cpp-microbdb) +target_link_libraries(library-cpp-microbdb PUBLIC + contrib-libs-cxxsupp + yutil + contrib-libs-fastlz + contrib-libs-libc_compat + contrib-libs-protobuf + contrib-libs-snappy + ZLIB::ZLIB + cpp-deprecated-fgood + cpp-on_disk-st_hash + library-cpp-packedtypes +) +target_proto_messages(library-cpp-microbdb PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/microbdb/noextinfo.proto +) +target_sources(library-cpp-microbdb PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/microbdb/file.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/microbdb/header.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/microbdb/microbdb.cpp +) +target_proto_addincls(library-cpp-microbdb + ./ + ${CMAKE_SOURCE_DIR}/ + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src +) +target_proto_outs(library-cpp-microbdb + --cpp_out=${CMAKE_BINARY_DIR}/ + --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ +) diff --git a/library/cpp/microbdb/align.h b/library/cpp/microbdb/align.h new file mode 100644 index 0000000000..2f8567f134 --- /dev/null +++ b/library/cpp/microbdb/align.h @@ -0,0 +1,17 @@ +#pragma once + +#include <util/system/defaults.h> + +using TDatAlign = int; + +static inline size_t DatFloor(size_t size) { + return (size - 1) & ~(sizeof(TDatAlign) - 1); +} + +static inline size_t DatCeil(size_t size) { + return DatFloor(size) + sizeof(TDatAlign); +} + +static inline void DatSet(void* ptr, size_t size) { + *(TDatAlign*)((char*)ptr + DatFloor(size)) = 0; +} diff --git a/library/cpp/microbdb/compressed.h b/library/cpp/microbdb/compressed.h new file mode 100644 index 0000000000..f0c9edfa92 --- /dev/null +++ b/library/cpp/microbdb/compressed.h @@ -0,0 +1,520 @@ +#pragma once + +#include <util/stream/zlib.h> + +#include "microbdb.h" +#include "safeopen.h" + +class TCompressedInputFileManip: public TInputFileManip { +public: + inline i64 GetLength() const { + return -1; // Some microbdb logic rely on unknown size of compressed files + } + + inline i64 Seek(i64 offset, int whence) { + i64 oldPos = DoGetPosition(); + i64 newPos = offset; + switch (whence) { + case SEEK_CUR: + newPos += oldPos; + [[fallthrough]]; // Complier happy. Please fix it! + case SEEK_SET: + break; + default: + return -1L; + } + if (oldPos > newPos) { + VerifyRandomAccess(); + DoSeek(0, SEEK_SET, IsStreamOpen()); + oldPos = 0; + } + const size_t bufsize = 1 << 12; + char buf[bufsize]; + for (i64 i = oldPos; i < newPos; i += bufsize) + InputStream->Read(buf, (i + (i64)bufsize < newPos) ? bufsize : (size_t)(newPos - i)); + return newPos; + } + + i64 RealSeek(i64 offset, int whence) { + InputStream.Destroy(); + i64 ret = DoSeek(offset, whence, !!CompressedInput); + if (ret != -1) + DoStreamOpen(DoCreateStream(), true); + return ret; + } + +protected: + IInputStream* CreateStream(const TFile& file) override { + CompressedInput.Reset(new TUnbufferedFileInput(file)); + return DoCreateStream(); + } + inline IInputStream* DoCreateStream() { + return new TZLibDecompress(CompressedInput.Get(), ZLib::GZip); + //return new TLzqDecompress(CompressedInput.Get()); + } + THolder<IInputStream> CompressedInput; +}; + +class TCompressedBufferedInputFileManip: public TCompressedInputFileManip { +protected: + IInputStream* CreateStream(const TFile& file) override { + CompressedInput.Reset(new TFileInput(file, 0x100000)); + return DoCreateStream(); + } +}; + +using TCompressedInputPageFile = TInputPageFileImpl<TCompressedInputFileManip>; +using TCompressedBufferedInputPageFile = TInputPageFileImpl<TCompressedBufferedInputFileManip>; + +template <class TVal> +struct TGzKey { + ui64 Offset; + TVal Key; + + static const ui32 RecordSig = TVal::RecordSig + 0x50495a47; + + TGzKey() { + } + + TGzKey(ui64 offset, const TVal& key) + : Offset(offset) + , Key(key) + { + } + + size_t SizeOf() const { + if (this) + return sizeof(Offset) + ::SizeOf(&Key); + else { + size_t sizeOfKey = ::SizeOf((TVal*)NULL); + return sizeOfKey ? (sizeof(Offset) + sizeOfKey) : 0; + } + } +}; + +template <class TVal> +class TInZIndexFile: protected TInDatFileImpl<TGzKey<TVal>> { + typedef TInDatFileImpl<TGzKey<TVal>> TDatFile; + typedef TGzKey<TVal> TGzVal; + typedef typename TDatFile::TRecIter TRecIter; + typedef typename TRecIter::TPageIter TPageIter; + +public: + TInZIndexFile() + : Index0(nullptr) + { + } + + int Open(const char* fname, size_t pages = 1, int pagesOrBytes = 1, ui32* gotRecordSig = nullptr) { + int ret = TDatFile::Open(fname, pages, pagesOrBytes, gotRecordSig); + if (ret) + return ret; + if (!(Index0 = (TDatPage*)malloc(TPageIter::GetPageSize()))) { + TDatFile::Close(); + return MBDB_NO_MEMORY; + } + if (SizeOf((TGzVal*)NULL)) + RecsOnPage = (TPageIter::GetPageSize() - sizeof(TDatPage)) / DatCeil(SizeOf((TGzVal*)NULL)); + TDatFile::Next(); + memcpy(Index0, TPageIter::Current(), TPageIter::GetPageSize()); + return 0; + } + + int Close() { + free(Index0); + Index0 = NULL; + return TDatFile::Close(); + } + + inline int GetError() const { + return TDatFile::GetError(); + } + + int FindKey(const TVal* akey, const typename TExtInfoType<TVal>::TResult* = NULL) { + assert(IsOpen()); + if (!SizeOf((TVal*)NULL)) + return FindVszKey(akey); + int pageno; + i64 offset; + FindKeyOnPage(pageno, offset, Index0, akey); + TDatPage* page = TPageIter::GotoPage(pageno + 1); + int num_add = (int)offset; + FindKeyOnPage(pageno, offset, page, akey); + return pageno + num_add; + } + + using TDatFile::IsOpen; + + int FindVszKey(const TVal* akey, const typename TExtInfoType<TVal>::TResult* = NULL) { + int pageno; + i64 offset; + FindVszKeyOnPage(pageno, offset, Index0, akey); + TDatPage* page = TPageIter::GotoPage(pageno + 1); + int num_add = (int)offset; + FindVszKeyOnPage(pageno, offset, page, akey); + return pageno + num_add; + } + + i64 FindPage(int pageno) { + if (!SizeOf((TVal*)NULL)) + return FindVszPage(pageno); + int recsize = DatCeil(SizeOf((TGzVal*)NULL)); + TDatPage* page = TPageIter::GotoPage(1 + pageno / RecsOnPage); + if (!page) // can happen if pageno is beyond EOF + return -1; + unsigned int localpageno = pageno % RecsOnPage; + if (localpageno >= page->RecNum) // can happen if pageno is beyond EOF + return -1; + TGzVal* v = (TGzVal*)((char*)page + sizeof(TDatPage) + localpageno * recsize); + return v->Offset; + } + + i64 FindVszPage(int pageno) { + TGzVal* cur = (TGzVal*)((char*)Index0 + sizeof(TDatPage)); + TGzVal* prev = cur; + unsigned int n = 0; + while (n < Index0->RecNum && cur->Offset <= (unsigned int)pageno) { + prev = cur; + cur = (TGzVal*)((char*)cur + DatCeil(SizeOf(cur))); + n++; + } + TDatPage* page = TPageIter::GotoPage(n); + unsigned int num_add = (unsigned int)(prev->Offset); + n = 0; + cur = (TGzVal*)((char*)page + sizeof(TDatPage)); + while (n < page->RecNum && n + num_add < (unsigned int)pageno) { + cur = (TGzVal*)((char*)cur + DatCeil(SizeOf(cur))); + n++; + } + if (n == page->RecNum) // can happen if pageno is beyond EOF + return -1; + return cur->Offset; + } + +protected: + void FindKeyOnPage(int& pageno, i64& offset, TDatPage* page, const TVal* Key) { + int left = 0; + int right = page->RecNum - 1; + int recsize = DatCeil(SizeOf((TGzVal*)NULL)); + while (left < right) { + int middle = (left + right) >> 1; + if (((TGzVal*)((char*)page + sizeof(TDatPage) + middle * recsize))->Key < *Key) + left = middle + 1; + else + right = middle; + } + //borders check (left and right) + pageno = (left == 0 || ((TGzVal*)((char*)page + sizeof(TDatPage) + left * recsize))->Key < *Key) ? left : left - 1; + offset = ((TGzVal*)((char*)page + sizeof(TDatPage) + pageno * recsize))->Offset; + } + + void FindVszKeyOnPage(int& pageno, i64& offset, TDatPage* page, const TVal* key) { + TGzVal* cur = (TGzVal*)((char*)page + sizeof(TDatPage)); + ui32 RecordSig = page->RecNum; + i64 tmpoffset = cur->Offset; + for (; RecordSig > 0 && cur->Key < *key; --RecordSig) { + tmpoffset = cur->Offset; + cur = (TGzVal*)((char*)cur + DatCeil(SizeOf(cur))); + } + int idx = page->RecNum - RecordSig - 1; + pageno = (idx >= 0) ? idx : 0; + offset = tmpoffset; + } + + TDatPage* Index0; + int RecsOnPage; +}; + +template <class TKey> +class TCompressedIndexedInputPageFile: public TCompressedInputPageFile { +public: + int GotoPage(int pageno); + +protected: + TInZIndexFile<TKey> KeyFile; +}; + +template <class TVal, class TKey> +class TDirectCompressedInDatFile: public TDirectInDatFile<TVal, TKey, + TInDatFileImpl<TVal, TInputRecordIterator<TVal, + TInputPageIterator<TCompressedIndexedInputPageFile<TKey>>>>> { +}; + +class TCompressedOutputFileManip: public TOutputFileManip { +public: + inline i64 GetLength() const { + return -1; // Some microbdb logic rely on unknown size of compressed files + } + + inline i64 Seek(i64 offset, int whence) { + i64 oldPos = DoGetPosition(); + i64 newPos = offset; + switch (whence) { + case SEEK_CUR: + newPos += oldPos; + [[fallthrough]]; // Compler happy. Please fix it! + case SEEK_SET: + break; + default: + return -1L; + } + if (oldPos > newPos) + return -1L; + + const size_t bufsize = 1 << 12; + char buf[bufsize] = {0}; + for (i64 i = oldPos; i < newPos; i += bufsize) + OutputStream->Write(buf, (i + (i64)bufsize < newPos) ? bufsize : (size_t)(newPos - i)); + return newPos; + } + + i64 RealSeek(i64 offset, int whence) { + OutputStream.Destroy(); + i64 ret = DoSeek(offset, whence, !!CompressedOutput); + if (ret != -1) + DoStreamOpen(DoCreateStream(), true); + return ret; + } + +protected: + IOutputStream* CreateStream(const TFile& file) override { + CompressedOutput.Reset(new TUnbufferedFileOutput(file)); + return DoCreateStream(); + } + inline IOutputStream* DoCreateStream() { + return new TZLibCompress(CompressedOutput.Get(), ZLib::GZip, 1); + } + THolder<IOutputStream> CompressedOutput; +}; + +class TCompressedBufferedOutputFileManip: public TCompressedOutputFileManip { +protected: + IOutputStream* CreateStream(const TFile& file) override { + CompressedOutput.Reset(new TUnbufferedFileOutput(file)); + return DoCreateStream(); + } + inline IOutputStream* DoCreateStream() { + return new TZLibCompress(CompressedOutput.Get(), ZLib::GZip, 1, 0x100000); + } +}; + +using TCompressedOutputPageFile = TOutputPageFileImpl<TCompressedOutputFileManip>; +using TCompressedBufferedOutputPageFile = TOutputPageFileImpl<TCompressedBufferedOutputFileManip>; + +template <class TVal> +class TOutZIndexFile: public TOutDatFileImpl< + TGzKey<TVal>, + TOutputRecordIterator<TGzKey<TVal>, TOutputPageIterator<TOutputPageFile>, TCallbackIndexer>> { + typedef TOutDatFileImpl< + TGzKey<TVal>, + TOutputRecordIterator<TGzKey<TVal>, TOutputPageIterator<TOutputPageFile>, TCallbackIndexer>> + TDatFile; + typedef TOutZIndexFile<TVal> TMyType; + typedef TGzKey<TVal> TGzVal; + typedef typename TDatFile::TRecIter TRecIter; + typedef typename TRecIter::TPageIter TPageIter; + typedef typename TRecIter::TIndexer TIndexer; + +public: + TOutZIndexFile() { + TotalRecNum = 0; + TIndexer::SetCallback(this, DispatchCallback); + } + + int Open(const char* fname, size_t pagesize, size_t pages, int pagesOrBytes = 1) { + int ret = TDatFile::Open(fname, pagesize, pages, pagesOrBytes); + if (ret) + return ret; + if ((ret = TRecIter::GotoPage(1))) + TDatFile::Close(); + return ret; + } + + int Close() { + TPageIter::Unfreeze(); + if (TRecIter::RecNum) + NextPage(TPageIter::Current()); + int ret = 0; + if (Index0.size() && !(ret = TRecIter::GotoPage(0))) { + typename std::vector<TGzVal>::iterator it, end = Index0.end(); + for (it = Index0.begin(); it != end; ++it) + TRecIter::Push(&*it); + ret = (TPageIter::GetPageNum() != 0) ? MBDB_PAGE_OVERFLOW : TPageIter::GetError(); + } + Index0.clear(); + int ret1 = TDatFile::Close(); + return ret ? ret : ret1; + } + +protected: + int TotalRecNum; // should be enough because we have GotoPage(int) + std::vector<TGzVal> Index0; + + void NextPage(const TDatPage* page) { + TGzVal* rec = (TGzVal*)((char*)page + sizeof(TDatPage)); + Index0.push_back(TGzVal(TotalRecNum, rec->Key)); + TotalRecNum += TRecIter::RecNum; + } + + static void DispatchCallback(void* This, const TDatPage* page) { + ((TMyType*)This)->NextPage(page); + } +}; + +template <class TVal, class TKey, class TPageFile = TCompressedOutputPageFile> +class TOutDirectCompressedFileImpl: public TOutDatFileImpl< + TVal, + TOutputRecordIterator<TVal, TOutputPageIterator<TPageFile>, TCallbackIndexer>> { + typedef TOutDatFileImpl< + TVal, + TOutputRecordIterator<TVal, TOutputPageIterator<TPageFile>, TCallbackIndexer>> + TDatFile; + typedef TOutDirectCompressedFileImpl<TVal, TKey, TPageFile> TMyType; + typedef typename TDatFile::TRecIter TRecIter; + typedef typename TRecIter::TPageIter TPageIter; + typedef typename TRecIter::TIndexer TIndexer; + typedef TGzKey<TKey> TMyKey; + typedef TOutZIndexFile<TKey> TKeyFile; + +protected: + using TDatFile::Tell; + +public: + TOutDirectCompressedFileImpl() { + TIndexer::SetCallback(this, DispatchCallback); + } + + int Open(const char* fname, size_t pagesize, size_t ipagesize = 0) { + char iname[FILENAME_MAX]; + int ret; + if (ipagesize == 0) + ipagesize = pagesize; + + ret = TDatFile::Open(fname, pagesize, 1, 1); + ret = ret ? ret : DatNameToIdx(iname, fname); + ret = ret ? ret : KeyFile.Open(iname, ipagesize, 1, 1); + if (ret) + TDatFile::Close(); + return ret; + } + + int Close() { + if (TRecIter::RecNum) + NextPage(TPageIter::Current()); + int ret = KeyFile.Close(); + int ret1 = TDatFile::Close(); + return ret1 ? ret1 : ret; + } + + int GetError() const { + return TDatFile::GetError() ? TDatFile::GetError() : KeyFile.GetError(); + } + +protected: + TKeyFile KeyFile; + + void NextPage(const TDatPage* page) { + size_t sz = SizeOf((TMyKey*)NULL); + TMyKey* rec = KeyFile.Reserve(sz ? sz : MaxSizeOf<TMyKey>()); + if (rec) { + rec->Offset = Tell(); + rec->Key = *(TVal*)((char*)page + sizeof(TDatPage)); + KeyFile.ResetDat(); + } + } + + static void DispatchCallback(void* This, const TDatPage* page) { + ((TMyType*)This)->NextPage(page); + } +}; + +template <class TKey> +int TCompressedIndexedInputPageFile<TKey>::GotoPage(int pageno) { + if (Error) + return Error; + + Eof = 0; + + i64 offset = KeyFile.FindPage(pageno); + if (!offset) + return Error = MBDB_BAD_FILE_SIZE; + + if (offset != FileManip.RealSeek(offset, SEEK_SET)) + Error = MBDB_BAD_FILE_SIZE; + + return Error; +} + +template <typename TVal> +class TCompressedInDatFile: public TInDatFile<TVal, TCompressedInputPageFile> { +public: + TCompressedInDatFile(const char* name, size_t pages, int pagesOrBytes = 1) + : TInDatFile<TVal, TCompressedInputPageFile>(name, pages, pagesOrBytes) + { + } +}; + +template <typename TVal> +class TCompressedOutDatFile: public TOutDatFile<TVal, TFakeCompression, TCompressedOutputPageFile> { +public: + TCompressedOutDatFile(const char* name, size_t pagesize, size_t pages, int pagesOrBytes = 1) + : TOutDatFile<TVal, TFakeCompression, TCompressedOutputPageFile>(name, pagesize, pages, pagesOrBytes) + { + } +}; + +template <typename TVal, typename TKey, typename TPageFile = TCompressedOutputPageFile> +class TOutDirectCompressedFile: protected TOutDirectCompressedFileImpl<TVal, TKey, TPageFile> { + typedef TOutDirectCompressedFileImpl<TVal, TKey, TPageFile> TBase; + +public: + TOutDirectCompressedFile(const char* name, size_t pagesize, size_t ipagesize = 0) + : Name(strdup(name)) + , PageSize(pagesize) + , IdxPageSize(ipagesize) + { + } + + ~TOutDirectCompressedFile() { + Close(); + free(Name); + Name = NULL; + } + + void Open(const char* fname) { + int ret = TBase::Open(fname, PageSize, IdxPageSize); + if (ret) + ythrow yexception() << ErrorMessage(ret, "Failed to open output file", fname); + free(Name); + Name = strdup(fname); + } + + void Close() { + int ret; + if ((ret = TBase::GetError())) + if (!std::uncaught_exception()) + ythrow yexception() << ErrorMessage(ret, "Error before closing output file", Name); + if ((ret = TBase::Close())) + if (!std::uncaught_exception()) + ythrow yexception() << ErrorMessage(ret, "Error while closing output file", Name); + } + + const char* GetName() const { + return Name; + } + + using TBase::Freeze; + using TBase::Push; + using TBase::Reserve; + using TBase::Unfreeze; + +protected: + char* Name; + size_t PageSize, IdxPageSize; +}; + +class TCompressedInterFileTypes { +public: + typedef TCompressedBufferedOutputPageFile TOutPageFile; + typedef TCompressedBufferedInputPageFile TInPageFile; +}; diff --git a/library/cpp/microbdb/extinfo.h b/library/cpp/microbdb/extinfo.h new file mode 100644 index 0000000000..c8389e783c --- /dev/null +++ b/library/cpp/microbdb/extinfo.h @@ -0,0 +1,127 @@ +#pragma once + +#include "header.h" + +#include <library/cpp/packedtypes/longs.h> + +#include <util/generic/typetraits.h> + +#include <library/cpp/microbdb/noextinfo.pb.h> + +inline bool operator<(const TNoExtInfo&, const TNoExtInfo&) { + return false; +} + +namespace NMicroBDB { + Y_HAS_MEMBER(TExtInfo); + + template <class, bool> + struct TSelectExtInfo; + + template <class T> + struct TSelectExtInfo<T, false> { + typedef TNoExtInfo TExtInfo; + }; + + template <class T> + struct TSelectExtInfo<T, true> { + typedef typename T::TExtInfo TExtInfo; + }; + + template <class T> + class TExtInfoType { + public: + static const bool Exists = THasTExtInfo<T>::value; + typedef typename TSelectExtInfo<T, Exists>::TExtInfo TResult; + }; + + Y_HAS_MEMBER(MakeExtKey); + + template <class, class, bool> + struct TSelectMakeExtKey; + + template <class TVal, class TKey> + struct TSelectMakeExtKey<TVal, TKey, false> { + static inline void Make(TKey* to, typename TExtInfoType<TKey>::TResult*, const TVal* from, const typename TExtInfoType<TVal>::TResult*) { + *to = *from; + } + }; + + template <class TVal, class TKey> + struct TSelectMakeExtKey<TVal, TKey, true> { + static inline void Make(TKey* to, typename TExtInfoType<TKey>::TResult* toExt, const TVal* from, const typename TExtInfoType<TVal>::TResult* fromExt) { + TVal::MakeExtKey(to, toExt, from, fromExt); + } + }; + + template <typename T> + inline size_t SizeOfExt(const T* rec, size_t* /*out*/ extLenSize = nullptr, size_t* /*out*/ extSize = nullptr) { + if (!TExtInfoType<T>::Exists) { + if (extLenSize) + *extLenSize = 0; + if (extSize) + *extSize = 0; + return SizeOf(rec); + } else { + size_t sz = SizeOf(rec); + i64 l; + int els = in_long(l, (const char*)rec + sz); + if (extLenSize) + *extLenSize = static_cast<size_t>(els); + if (extSize) + *extSize = static_cast<size_t>(l); + return sz; + } + } + + template <class T> + bool GetExtInfo(const T* rec, typename TExtInfoType<T>::TResult* extInfo) { + Y_VERIFY(TExtInfoType<T>::Exists, "GetExtInfo should only be used with extended records"); + if (!rec) + return false; + size_t els; + size_t es; + size_t s = SizeOfExt(rec, &els, &es); + const ui8* raw = (const ui8*)rec + s + els; + return extInfo->ParseFromArray(raw, es); + } + + template <class T> + const ui8* GetExtInfoRaw(const T* rec, size_t* len) { + Y_VERIFY(TExtInfoType<T>::Exists, "GetExtInfo should only be used with extended records"); + if (!rec) { + *len = 0; + return nullptr; + } + size_t els; + size_t es; + size_t s = SizeOfExt(rec, &els, &es); + *len = els + es; + return (const ui8*)rec + s; + } + + // Compares serialized extInfo (e.g. for stable sort) + template <class T> + int CompareExtInfo(const T* a, const T* b) { + Y_VERIFY(TExtInfoType<T>::Exists, "CompareExtInfo should only be used with extended records"); + size_t elsA, esA; + size_t elsB, esB; + SizeOfExt(a, &elsA, &esA); + SizeOfExt(a, &elsB, &esB); + if (esA != esB) + return esA - esB; + else + return memcmp((const ui8*)a + elsA, (const ui8*)b + elsB, esA); + } + +} + +using NMicroBDB::TExtInfoType; + +template <class TVal, class TKey> +struct TMakeExtKey { + static const bool Exists = NMicroBDB::THasMakeExtKey<TVal>::value; + static inline void Make(TKey* to, typename TExtInfoType<TKey>::TResult* toExt, const TVal* from, const typename TExtInfoType<TVal>::TResult* fromExt) { + NMicroBDB::TSelectMakeExtKey<TVal, TKey, Exists>::Make(to, toExt, from, fromExt); + } +}; diff --git a/library/cpp/microbdb/file.cpp b/library/cpp/microbdb/file.cpp new file mode 100644 index 0000000000..599a7301a0 --- /dev/null +++ b/library/cpp/microbdb/file.cpp @@ -0,0 +1,220 @@ +#include "file.h" + +#include <fcntl.h> +#include <errno.h> +#include <sys/stat.h> + +#ifdef _win32_ +#define S_ISREG(x) !!(x & S_IFREG) +#endif + +TFileManipBase::TFileManipBase() + : FileBased(true) +{ +} + +i64 TFileManipBase::DoSeek(i64 offset, int whence, bool isStreamOpen) { + if (!isStreamOpen) + return -1; + VerifyRandomAccess(); + return File.Seek(offset, (SeekDir)whence); +} + +int TFileManipBase::DoFileOpen(const TFile& file) { + File = file; + SetFileBased(IsFileBased()); + return (File.IsOpen()) ? 0 : MBDB_OPEN_ERROR; +} + +int TFileManipBase::DoFileClose() { + if (File.IsOpen()) { + File.Close(); + return MBDB_ALREADY_INITIALIZED; + } + return 0; +} + +int TFileManipBase::IsFileBased() const { + bool fileBased = true; +#if defined(_win_) +#elif defined(_unix_) + FHANDLE h = File.GetHandle(); + struct stat sb; + fileBased = false; + if (h != INVALID_FHANDLE && !::fstat(h, &sb) && S_ISREG(sb.st_mode)) { + fileBased = true; + } +#else +#error +#endif + return fileBased; +} + +TInputFileManip::TInputFileManip() + : InputStream(nullptr) +{ +} + +int TInputFileManip::Open(const char* fname, bool direct) { + int ret; + return (ret = DoClose()) ? ret : DoStreamOpen(TFile(fname, RdOnly | (direct ? DirectAligned : EOpenMode()))); +} + +int TInputFileManip::Open(IInputStream& input) { + int ret; + return (ret = DoClose()) ? ret : DoStreamOpen(&input); +} + +int TInputFileManip::Open(TAutoPtr<IInputStream> input) { + int ret; + return (ret = DoClose()) ? ret : DoStreamOpen(input.Release()); +} + +int TInputFileManip::Init(const TFile& file) { + int ret; + if (ret = DoClose()) + return ret; + DoStreamOpen(file); + return 0; +} + +int TInputFileManip::Close() { + DoClose(); + return 0; +} + +ssize_t TInputFileManip::Read(void* buf, unsigned len) { + if (!IsStreamOpen()) + return -1; + return InputStream->Load(buf, len); +} + +IInputStream* TInputFileManip::CreateStream(const TFile& file) { + return new TUnbufferedFileInput(file); +} + +TMappedInputPageFile::TMappedInputPageFile() + : Pagesize(0) + , Error(0) + , Pagenum(0) + , Recordsig(0) + , Open(false) +{ + Term(); +} + +TMappedInputPageFile::~TMappedInputPageFile() { + Term(); +} + +int TMappedInputPageFile::Init(const char* fname, ui32 recsig, ui32* gotRecordSig, bool) { + Mappedfile.init(fname); + Open = true; + + TDatMetaPage* meta = (TDatMetaPage*)Mappedfile.getData(); + if (gotRecordSig) + *gotRecordSig = meta->RecordSig; + + if (meta->MetaSig != METASIG) + Error = MBDB_BAD_METAPAGE; + else if (meta->RecordSig != recsig) + Error = MBDB_BAD_RECORDSIG; + + if (Error) { + Mappedfile.term(); + return Error; + } + + size_t fsize = Mappedfile.getSize(); + if (fsize < METASIZE) + return Error = MBDB_BAD_FILE_SIZE; + fsize -= METASIZE; + if (fsize % meta->PageSize) + return Error = MBDB_BAD_FILE_SIZE; + Pagenum = (int)(fsize / meta->PageSize); + Pagesize = meta->PageSize; + Recordsig = meta->RecordSig; + Error = 0; + return Error; +} + +int TMappedInputPageFile::Term() { + Mappedfile.term(); + Open = false; + return 0; +} + +TOutputFileManip::TOutputFileManip() + : OutputStream(nullptr) +{ +} + +int TOutputFileManip::Open(const char* fname, EOpenMode mode) { + if (IsStreamOpen()) { + return MBDB_ALREADY_INITIALIZED; // should it be closed as TInputFileManip + } + + try { + if (unlink(fname) && errno != ENOENT) { + if (strncmp(fname, "/dev/std", 8)) + return MBDB_OPEN_ERROR; + } + TFile file(fname, mode); + DoStreamOpen(file); + } catch (const TFileError&) { + return MBDB_OPEN_ERROR; + } + return 0; +} + +int TOutputFileManip::Open(IOutputStream& output) { + if (IsStreamOpen()) + return MBDB_ALREADY_INITIALIZED; + DoStreamOpen(&output); + return 0; +} + +int TOutputFileManip::Open(TAutoPtr<IOutputStream> output) { + if (IsStreamOpen()) + return MBDB_ALREADY_INITIALIZED; + DoStreamOpen(output.Release()); + return 0; +} + +int TOutputFileManip::Init(const TFile& file) { + if (IsStreamOpen()) + return MBDB_ALREADY_INITIALIZED; // should it be closed as TInputFileManip + DoStreamOpen(file); + return 0; +} + +int TOutputFileManip::Rotate(const char* newfname) { + if (!IsStreamOpen()) { + return MBDB_NOT_INITIALIZED; + } + + try { + TFile file(newfname, WrOnly | OpenAlways | TruncExisting | ARW | AWOther); + DoClose(); + DoStreamOpen(file); + } catch (const TFileError&) { + return MBDB_OPEN_ERROR; + } + return 0; +} + +int TOutputFileManip::Close() { + DoClose(); + return 0; +} + +int TOutputFileManip::Write(const void* buf, unsigned len) { + if (!IsStreamOpen()) + return -1; + OutputStream->Write(buf, len); + return len; +} + +IOutputStream* TOutputFileManip::CreateStream(const TFile& file) { + return new TUnbufferedFileOutput(file); +} diff --git a/library/cpp/microbdb/file.h b/library/cpp/microbdb/file.h new file mode 100644 index 0000000000..f7c7818375 --- /dev/null +++ b/library/cpp/microbdb/file.h @@ -0,0 +1,225 @@ +#pragma once + +#include "header.h" + +#include <library/cpp/deprecated/mapped_file/mapped_file.h> + +#include <util/generic/noncopyable.h> +#include <util/stream/file.h> +#include <util/system/filemap.h> + +#define FS_BLOCK_SIZE 512 + +class TFileManipBase { +protected: + TFileManipBase(); + + virtual ~TFileManipBase() { + } + + i64 DoSeek(i64 offset, int whence, bool isStreamOpen); + + int DoFileOpen(const TFile& file); + + int DoFileClose(); + + int IsFileBased() const; + + inline void SetFileBased(bool fileBased) { + FileBased = fileBased; + } + + inline i64 DoGetPosition() const { + Y_ASSERT(FileBased); + return File.GetPosition(); + } + + inline i64 DoGetLength() const { + return (FileBased) ? File.GetLength() : -1; + } + + inline void VerifyRandomAccess() const { + Y_VERIFY(FileBased, "non-file stream can not be accessed randomly"); + } + + inline i64 GetPosition() const { + return (i64)File.GetPosition(); + } + +private: + TFile File; + bool FileBased; +}; + +class TInputFileManip: public TFileManipBase { +public: + using TFileManipBase::GetPosition; + + TInputFileManip(); + + int Open(const char* fname, bool direct = false); + + int Open(IInputStream& input); + + int Open(TAutoPtr<IInputStream> input); + + int Init(const TFile& file); + + int Close(); + + ssize_t Read(void* buf, unsigned len); + + inline bool IsOpen() const { + return IsStreamOpen(); + } + + inline i64 GetLength() const { + return DoGetLength(); + } + + inline i64 Seek(i64 offset, int whence) { + return DoSeek(offset, whence, IsStreamOpen()); + } + + inline i64 RealSeek(i64 offset, int whence) { + return Seek(offset, whence); + } + +protected: + inline bool IsStreamOpen() const { + return !!InputStream; + } + + inline int DoStreamOpen(IInputStream* input, bool fileBased = false) { + InputStream.Reset(input); + SetFileBased(fileBased); + return 0; + } + + inline int DoStreamOpen(const TFile& file) { + int ret; + return (ret = DoFileOpen(file)) ? ret : DoStreamOpen(CreateStream(file), IsFileBased()); + } + + virtual IInputStream* CreateStream(const TFile& file); + + inline bool DoClose() { + if (IsStreamOpen()) { + InputStream.Destroy(); + return DoFileClose(); + } + return 0; + } + + THolder<IInputStream> InputStream; +}; + +class TMappedInputPageFile: private TNonCopyable { +public: + TMappedInputPageFile(); + + ~TMappedInputPageFile(); + + inline int GetError() const { + return Error; + } + + inline size_t GetPageSize() const { + return Pagesize; + } + + inline int GetLastPage() const { + return Pagenum; + } + + inline ui32 GetRecordSig() const { + return Recordsig; + } + + inline bool IsOpen() const { + return Open; + } + + inline char* GetData() const { + return Open ? (char*)Mappedfile.getData() : nullptr; + } + + inline size_t GetSize() const { + return Open ? Mappedfile.getSize() : 0; + } + +protected: + int Init(const char* fname, ui32 recsig, ui32* gotRecordSig = nullptr, bool direct = false); + + int Term(); + + TMappedFile Mappedfile; + size_t Pagesize; + int Error; + int Pagenum; + ui32 Recordsig; + bool Open; +}; + +class TOutputFileManip: public TFileManipBase { +public: + TOutputFileManip(); + + int Open(const char* fname, EOpenMode mode = WrOnly | CreateAlways | ARW | AWOther); + + int Open(IOutputStream& output); + + int Open(TAutoPtr<IOutputStream> output); + + int Init(const TFile& file); + + int Rotate(const char* newfname); + + int Write(const void* buf, unsigned len); + + int Close(); + + inline bool IsOpen() const { + return IsStreamOpen(); + } + + inline i64 GetLength() const { + return DoGetLength(); + } + + inline i64 Seek(i64 offset, int whence) { + return DoSeek(offset, whence, IsStreamOpen()); + } + + inline i64 RealSeek(i64 offset, int whence) { + return Seek(offset, whence); + } + +protected: + inline bool IsStreamOpen() const { + return !!OutputStream; + } + + inline int DoStreamOpen(IOutputStream* output, bool fileBased = false) { + OutputStream.Reset(output); + SetFileBased(fileBased); + return 0; + } + + inline int DoStreamOpen(const TFile& file) { + int ret; + return (ret = DoFileOpen(file)) ? ret : DoStreamOpen(CreateStream(file), true); + } + + virtual IOutputStream* CreateStream(const TFile& file); + + inline bool DoClose() { + if (IsStreamOpen()) { + OutputStream.Destroy(); + return DoFileClose(); + } + return 0; + } + + THolder<IOutputStream> OutputStream; +}; diff --git a/library/cpp/microbdb/hashes.h b/library/cpp/microbdb/hashes.h new file mode 100644 index 0000000000..bfd113c3ba --- /dev/null +++ b/library/cpp/microbdb/hashes.h @@ -0,0 +1,250 @@ +#pragma once + +#include <library/cpp/on_disk/st_hash/static_hash.h> +#include <util/system/sysstat.h> +#include <util/stream/mem.h> +#include <util/string/printf.h> +#include <library/cpp/deprecated/fgood/fgood.h> + +#include "safeopen.h" + +/** This file currently implements creation of mappable read-only hash file. + Basic usage of these "static hashes" is defined in util/static_hash.h (see docs there). + Additional useful wrappers are available in util/static_hash_map.h + + There are two ways to create mappable hash file: + + A) Fill an THashMap/set structure in RAM, then dump it to disk. + This is usually done by save_hash_to_file* functions defined in static_hash.h + (see description in static_hash.h). + + B) Prepare all data using external sorter, then create hash file straight on disk. + This approach is necessary when there isn't enough RAM to hold entire original THashMap. + Implemented in this file as TStaticHashBuilder class. + + Current implementation's major drawback is that the size of the hash must be estimated + before the hash is built (bucketCount), which is not always possible. + Separate implementation with two sort passes is yet to be done. + + Another problem is that maximum stored size of the element (maxRecSize) must also be + known in advance, because we use TDatSorterMemo, etc. + */ + +template <class SizeType> +struct TSthashTmpRec { + SizeType HashVal; + SizeType RecSize; + char Buf[1]; + size_t SizeOf() const { + return &Buf[RecSize] - (char*)this; + } + bool operator<(const TSthashTmpRec& than) const { + return HashVal < than.HashVal; + } + static const ui32 RecordSig = 20100124 + sizeof(SizeType) - 4; +}; + +template <typename T> +struct TReplaceMerger { + T operator()(const T& oldRecord, const T& newRecord) const { + Y_UNUSED(oldRecord); + return newRecord; + } +}; + +/** TStaticHashBuilder template parameters: + HashType - THashMap map/set type for which we construct corresponding mappable hash; + SizeType - type used to store offsets and length in resulting hash; + MergerType - type of object to process records with equal key (see TReplaceMerger for example); + */ + +template <class HashType, class SizeType, class MergerType = TReplaceMerger<typename HashType::mapped_type>> +struct TStaticHashBuilder { + const size_t SrtIOPageSz; + const size_t WrBufSz; + typedef TSthashTmpRec<SizeType> TIoRec; + typedef TSthashWriter<typename HashType::key_type, typename HashType::mapped_type, SizeType> TKeySaver; + typedef typename HashType::value_type TValueType; + typedef typename HashType::mapped_type TMappedType; + typedef typename HashType::key_type TKeyType; + + TDatSorterMemo<TIoRec, TCompareByLess> Srt; + TBuffer IoRec, CurrentBlockRecs; + TKeySaver KeySaver; + typename HashType::hasher Hasher; + typename HashType::key_equal Equals; + MergerType merger; + TString HashFileName; + TString OurTmpDir; + size_t BucketCount; + int FreeBits; + + // memSz is the Sorter buffer size; + // maxRecSize is the maximum size (as reported by size_for_st) of our record(s) + TStaticHashBuilder(size_t memSz, size_t maxRecSize) + : SrtIOPageSz((maxRecSize * 16 + 65535) & ~size_t(65535)) + , WrBufSz(memSz / 16 >= SrtIOPageSz ? memSz / 16 : SrtIOPageSz) + , Srt("unused", memSz, SrtIOPageSz, WrBufSz, 0) + , IoRec(sizeof(TIoRec) + maxRecSize) + , CurrentBlockRecs(sizeof(TIoRec) + maxRecSize) + , BucketCount(0) + , FreeBits(0) + { + } + + ~TStaticHashBuilder() { + Close(); + } + + // if tmpDir is supplied, it must exist; + // bucketCount should be HashBucketCount() of the (estimated) element count + void Open(const char* fname, size_t bucketCount, const char* tmpDir = nullptr) { + if (!tmpDir) + tmpDir = ~(OurTmpDir = Sprintf("%s.temp", fname)); + Mkdir(tmpDir, MODE0775); + Srt.Open(tmpDir); + HashFileName = fname; + BucketCount = bucketCount; + int bitCount = 0; + while (((size_t)1 << bitCount) <= BucketCount && bitCount < int(8 * sizeof(size_t))) + ++bitCount; + FreeBits = 8 * sizeof(size_t) - bitCount; + } + + void Push(const TValueType& rec) { + TIoRec* ioRec = MakeIoRec(rec); + Srt.Push(ioRec); + } + TIoRec* MakeIoRec(const TValueType& rec) { + TIoRec* ioRec = (TIoRec*)IoRec.Data(); + size_t mask = (1 << FreeBits) - 1; + size_t hash = Hasher(rec.first); + ioRec->HashVal = ((hash % BucketCount) << FreeBits) + ((hash / BucketCount) & mask); + + TMemoryOutput output(ioRec->Buf, IoRec.Capacity() - offsetof(TIoRec, Buf)); + KeySaver.SaveRecord(&output, rec); + ioRec->RecSize = output.Buf() - ioRec->Buf; + return ioRec; + } + + bool Merge(TVector<std::pair<TKeyType, TMappedType>>& records, size_t newRecordSize) { + TSthashIterator<const TKeyType, const TMappedType, typename HashType::hasher, + typename HashType::key_equal> + newPtr(CurrentBlockRecs.End() - newRecordSize); + for (size_t i = 0; i < records.size(); ++i) { + if (newPtr.KeyEquals(Equals, records[i].first)) { + TMappedType oldValue = records[i].second; + TMappedType newValue = newPtr.Value(); + newValue = merger(oldValue, newValue); + records[i].second = newValue; + return true; + } + } + records.push_back(std::make_pair(newPtr.Key(), newPtr.Value())); + return false; + } + + void PutRecord(const char* buf, size_t rec_size, TFILEPtr& f, SizeType& cur_off) { + f.fsput(buf, rec_size); + cur_off += rec_size; + } + + void Finish() { + Srt.Sort(); + // We use variant 1. + // Variant 1: read sorter once, write records, fseeks to write buckets + // (this doesn't allow fname to be stdout) + // Variant 2: read sorter (probably temp. file) twice: write buckets, then write records + // (this allows fname to be stdout but seems to be longer) + TFILEPtr f(HashFileName, "wb"); + setvbuf(f, nullptr, _IOFBF, WrBufSz); + TVector<SizeType> bucketsBuf(WrBufSz, 0); + // prepare header (note: this code must be unified with save_stl.h) + typedef sthashtable_nvm_sv<typename HashType::hasher, typename HashType::key_equal, SizeType> sv_type; + sv_type sv = {Hasher, Equals, BucketCount, 0, 0}; + // to do: m.b. use just the size of corresponding object? + SizeType cur_off = sizeof(sv_type) + + (sv.num_buckets + 1) * sizeof(SizeType); + SizeType bkt_wroff = sizeof(sv_type), bkt_bufpos = 0, prev_bkt = 0, prev_hash = (SizeType)-1; + bucketsBuf[bkt_bufpos++] = cur_off; + // if might me better to write many zeroes here + f.seek(cur_off, SEEK_SET); + TVector<std::pair<TKeyType, TMappedType>> currentBlock; + bool emptyFile = true; + size_t prevRecSize = 0; + // seek forward + while (true) { + const TIoRec* rec = Srt.Next(); + if (currentBlock.empty() && !emptyFile) { + if (rec && prev_hash == rec->HashVal) { + Merge(currentBlock, prevRecSize); + } else { + // if there is only one record with this hash, don't recode it, just write + PutRecord(CurrentBlockRecs.Data(), prevRecSize, f, cur_off); + sv.num_elements++; + } + } + if (!rec || prev_hash != rec->HashVal) { + // write buckets table + for (size_t i = 0; i < currentBlock.size(); ++i) { + TIoRec* ioRec = MakeIoRec(TValueType(currentBlock[i])); + PutRecord(ioRec->Buf, ioRec->RecSize, f, cur_off); + } + sv.num_elements += currentBlock.size(); + currentBlock.clear(); + CurrentBlockRecs.Clear(); + if (rec) { + prev_hash = rec->HashVal; + } + } + // note: prev_bkt's semantics here is 'cur_bkt - 1', thus we are actually cycling + // until cur_bkt == rec->HashVal *inclusively* + while (!rec || prev_bkt != (rec->HashVal >> FreeBits)) { + bucketsBuf[bkt_bufpos++] = cur_off; + if (bkt_bufpos == bucketsBuf.size()) { + f.seek(bkt_wroff, SEEK_SET); + size_t sz = bkt_bufpos * sizeof(bucketsBuf[0]); + if (f.write(bucketsBuf.begin(), 1, sz) != sz) + throw yexception() << "could not write " << sz << " bytes to " << ~HashFileName; + bkt_wroff += sz; + bkt_bufpos = 0; + f.seek(cur_off, SEEK_SET); + } + prev_bkt++; + if (!rec) { + break; + } + assert(prev_bkt < BucketCount); + } + if (!rec) { + break; + } + emptyFile = false; + CurrentBlockRecs.Append(rec->Buf, rec->RecSize); + if (!currentBlock.empty()) { + Merge(currentBlock, rec->RecSize); + } else { + prevRecSize = rec->RecSize; + } + } + // finish buckets table + f.seek(bkt_wroff, SEEK_SET); + size_t sz = bkt_bufpos * sizeof(bucketsBuf[0]); + if (sz && f.write(bucketsBuf.begin(), 1, sz) != sz) + throw yexception() << "could not write " << sz << " bytes to " << ~HashFileName; + bkt_wroff += sz; + for (; prev_bkt < BucketCount; prev_bkt++) + f.fput(cur_off); + // finally write header + sv.data_end_off = cur_off; + f.seek(0, SEEK_SET); + f.fput(sv); + f.close(); + } + + void Close() { + Srt.Close(); + if (+OurTmpDir) + rmdir(~OurTmpDir); + } +}; diff --git a/library/cpp/microbdb/header.cpp b/library/cpp/microbdb/header.cpp new file mode 100644 index 0000000000..f4511d6fb6 --- /dev/null +++ b/library/cpp/microbdb/header.cpp @@ -0,0 +1,91 @@ +#include "header.h" + +#include <util/stream/output.h> +#include <util/stream/format.h> + +TString ToString(EMbdbErrors error) { + TString ret; + switch (error) { + case MBDB_ALREADY_INITIALIZED: + ret = "already initialized"; + break; + case MBDB_NOT_INITIALIZED: + ret = "not initialized"; + break; + case MBDB_BAD_DESCRIPTOR: + ret = "bad descriptor"; + break; + case MBDB_OPEN_ERROR: + ret = "open error"; + break; + case MBDB_READ_ERROR: + ret = "read error"; + break; + case MBDB_WRITE_ERROR: + ret = "write error"; + break; + case MBDB_CLOSE_ERROR: + ret = "close error"; + break; + case MBDB_EXPECTED_EOF: + ret = "expected eof"; + break; + case MBDB_UNEXPECTED_EOF: + ret = "unxepected eof"; + break; + case MBDB_BAD_FILENAME: + ret = "bad filename"; + break; + case MBDB_BAD_METAPAGE: + ret = "bad metapage"; + break; + case MBDB_BAD_RECORDSIG: + ret = "bad recordsig"; + break; + case MBDB_BAD_FILE_SIZE: + ret = "bad file size"; + break; + case MBDB_BAD_PAGESIG: + ret = "bad pagesig"; + break; + case MBDB_BAD_PAGESIZE: + ret = "bad pagesize"; + break; + case MBDB_BAD_PARM: + ret = "bad parm"; + break; + case MBDB_BAD_SYNC: + ret = "bad sync"; + break; + case MBDB_PAGE_OVERFLOW: + ret = "page overflow"; + break; + case MBDB_NO_MEMORY: + ret = "no memory"; + break; + case MBDB_MEMORY_LEAK: + ret = "memory leak"; + break; + case MBDB_NOT_SUPPORTED: + ret = "not supported"; + break; + default: + ret = "unknown"; + break; + } + return ret; +} + +TString ErrorMessage(int error, const TString& text, const TString& path, ui32 recordSig, ui32 gotRecordSig) { + TStringStream str; + str << text; + if (path.size()) + str << " '" << path << "'"; + str << ": " << ToString(static_cast<EMbdbErrors>(error)); + if (recordSig && (!gotRecordSig || recordSig != gotRecordSig)) + str << ". Expected RecordSig: " << Hex(recordSig, HF_ADDX); + if (recordSig && gotRecordSig && recordSig != gotRecordSig) + str << ", got: " << Hex(gotRecordSig, HF_ADDX); + str << ". Last system error text: " << LastSystemErrorText(); + return str.Str(); +} diff --git a/library/cpp/microbdb/header.h b/library/cpp/microbdb/header.h new file mode 100644 index 0000000000..0951d610ea --- /dev/null +++ b/library/cpp/microbdb/header.h @@ -0,0 +1,159 @@ +#pragma once + +#include <util/system/defaults.h> +#include <util/generic/typetraits.h> +#include <util/generic/string.h> +#include <util/str_stl.h> + +#include <stdio.h> + +#define METASIZE (1u << 12) +#define METASIG 0x12345678u +#define PAGESIG 0x87654321u + +enum EMbdbErrors { + MBDB_ALREADY_INITIALIZED = 200, + MBDB_NOT_INITIALIZED = 201, + MBDB_BAD_DESCRIPTOR = 202, + MBDB_OPEN_ERROR = 203, + MBDB_READ_ERROR = 204, + MBDB_WRITE_ERROR = 205, + MBDB_CLOSE_ERROR = 206, + MBDB_EXPECTED_EOF = 207, + MBDB_UNEXPECTED_EOF = 208, + MBDB_BAD_FILENAME = 209, + MBDB_BAD_METAPAGE = 210, + MBDB_BAD_RECORDSIG = 211, + MBDB_BAD_FILE_SIZE = 212, + MBDB_BAD_PAGESIG = 213, + MBDB_BAD_PAGESIZE = 214, + MBDB_BAD_PARM = 215, + MBDB_BAD_SYNC = 216, + MBDB_PAGE_OVERFLOW = 217, + MBDB_NO_MEMORY = 218, + MBDB_MEMORY_LEAK = 219, + MBDB_NOT_SUPPORTED = 220 +}; + +TString ToString(EMbdbErrors error); +TString ErrorMessage(int error, const TString& text, const TString& path = TString(), ui32 recordSig = 0, ui32 gotRecordSig = 0); + +enum EPageFormat { + MBDB_FORMAT_RAW = 0, + MBDB_FORMAT_COMPRESSED = 1, + MBDB_FORMAT_NULL = 255 +}; + +enum ECompressionAlgorithm { + MBDB_COMPRESSION_ZLIB = 1, + MBDB_COMPRESSION_FASTLZ = 2, + MBDB_COMPRESSION_SNAPPY = 3 +}; + +struct TDatMetaPage { + ui32 MetaSig; + ui32 RecordSig; + ui32 PageSize; +}; + +struct TDatPage { + ui32 RecNum; //!< number of records on this page + ui32 PageSig; + ui32 Format : 2; //!< one of EPageFormat + ui32 Reserved : 30; +}; + +/// Additional page header with compression info +struct TCompressedPage { + ui32 BlockCount; + ui32 Algorithm : 4; + ui32 Version : 4; + ui32 Reserved : 24; +}; + +namespace NMicroBDB { + /// Header of compressed block + struct TCompressedHeader { + ui32 Compressed; + ui32 Original; /// original size of block + ui32 Count; /// number of records in block + ui32 Reserved; + }; + + Y_HAS_MEMBER(AssertValid); + + template <typename T, bool TVal> + struct TAssertValid { + void operator()(const T*) { + } + }; + + template <typename T> + struct TAssertValid<T, true> { + void operator()(const T* rec) { + return rec->AssertValid(); + } + }; + + template <typename T> + void AssertValid(const T* rec) { + return NMicroBDB::TAssertValid<T, NMicroBDB::THasAssertValid<T>::value>()(rec); + } + + Y_HAS_MEMBER(SizeOf); + + template <typename T, bool TVal> + struct TGetSizeOf; + + template <typename T> + struct TGetSizeOf<T, true> { + size_t operator()(const T* rec) { + return rec->SizeOf(); + } + }; + + template <typename T> + struct TGetSizeOf<T, false> { + size_t operator()(const T*) { + return sizeof(T); + } + }; + + inline char* GetFirstRecord(const TDatPage* page) { + switch (page->Format) { + case MBDB_FORMAT_RAW: + return (char*)page + sizeof(TDatPage); + case MBDB_FORMAT_COMPRESSED: + // Первая запись на сжатой странице сохраняется несжатой + // сразу же после всех заголовков. + // Алгоритм сохранения смотреть в TOutputRecordIterator::FlushBuffer + return (char*)page + sizeof(TDatPage) + sizeof(TCompressedPage) + sizeof(NMicroBDB::TCompressedHeader); + } + return (char*)nullptr; + } +} + +template <typename T> +size_t SizeOf(const T* rec) { + return NMicroBDB::TGetSizeOf<T, NMicroBDB::THasSizeOf<T>::value>()(rec); +} + +template <typename T> +size_t MaxSizeOf() { + return sizeof(T); +} + +static inline int DatNameToIdx(char iname[/*FILENAME_MAX*/], const char* dname) { + if (!dname || !*dname) + return MBDB_BAD_FILENAME; + const char* ptr; + if (!(ptr = strrchr(dname, '/'))) + ptr = dname; + if (!(ptr = strrchr(ptr, '.'))) + ptr = strchr(dname, 0); + if (ptr - dname > FILENAME_MAX - 5) + return MBDB_BAD_FILENAME; + memcpy(iname, dname, ptr - dname); + strcpy(iname + (ptr - dname), ".idx"); + return 0; +} diff --git a/library/cpp/microbdb/heap.h b/library/cpp/microbdb/heap.h new file mode 100644 index 0000000000..ef5a53534c --- /dev/null +++ b/library/cpp/microbdb/heap.h @@ -0,0 +1,143 @@ +#pragma once + +#include "header.h" +#include "extinfo.h" + +#include <util/generic/vector.h> + +#include <errno.h> + +/////////////////////////////////////////////////////////////////////////////// + +/// Default comparator +template <class TVal> +struct TCompareByLess { + inline bool operator()(const TVal* a, const TVal* b) const { + return TLess<TVal>()(*a, *b); + } +}; + +/////////////////////////////////////////////////////////////////////////////// + +template <class TVal, class TIterator, class TCompare = TCompareByLess<TVal>> +class THeapIter { +public: + int Init(TIterator** iters, int count) { + Term(); + if (!count) + return 0; + if (!(Heap = (TIterator**)malloc(count * sizeof(TIterator*)))) + return ENOMEM; + + Count = count; + count = 0; + while (count < Count) + if (count && !(*iters)->Next()) { //here first TIterator is NOT initialized! + Count--; + iters++; + } else { + Heap[count++] = *iters++; + } + count = Count / 2; + while (--count > 0) //Heap[0] is not changed! + Sift(count, Count); //do not try to replace this code by make_heap + return 0; + } + + int Init(TIterator* iters, int count) { + TVector<TIterator*> a(count); + for (int i = 0; i < count; ++i) + a[i] = &iters[i]; + return Init(&a[0], count); + } + + THeapIter() + : Heap(nullptr) + , Count(0) + { + } + + THeapIter(TIterator* a, TIterator* b) + : Heap(nullptr) + , Count(0) + { + TIterator* arr[] = {a, b}; + if (Init(arr, 2)) + ythrow yexception() << "can't Init THeapIter"; + } + + THeapIter(TVector<TIterator>& v) + : Heap(nullptr) + , Count(0) + { + if (Init(&v[0], v.size())) { + ythrow yexception() << "can't Init THeapIter"; + } + } + + ~THeapIter() { + Term(); + } + + inline const TVal* Current() const { + if (!Count) + return nullptr; + return (*Heap)->Current(); + } + + inline const TIterator* CurrentIter() const { + return *Heap; + } + + //for ends of last file will use Heap[0] = Heap[0] ! and + //returns Current of eof so Current of eof MUST return NULL + //possible this is bug and need fixing + const TVal* Next() { + if (!Count) + return nullptr; + if (!(*Heap)->Next()) //on first call unitialized first TIterator + *Heap = Heap[--Count]; //will be correctly initialized + + if (Count == 2) { + if (TCompare()(Heap[1]->Current(), Heap[0]->Current())) + DoSwap(Heap[1], Heap[0]); + } else + Sift(0, Count); + + return Current(); + } + + inline bool GetExtInfo(typename TExtInfoType<TVal>::TResult* extInfo) const { + return (*Heap)->GetExtInfo(extInfo); + } + + inline const ui8* GetExtInfoRaw(size_t* len) const { + return (*Heap)->GetExtInfoRaw(len); + } + + void Term() { + ::free(Heap); + Heap = nullptr; + Count = 0; + } + +protected: + void Sift(int node, int end) { + TIterator* x = Heap[node]; + int son; + for (son = 2 * node + 1; son < end; node = son, son = 2 * node + 1) { + if (son < (end - 1) && TCompare()(Heap[son + 1]->Current(), Heap[son]->Current())) + son++; + if (TCompare()(Heap[son]->Current(), x->Current())) + Heap[node] = Heap[son]; + else + break; + } + Heap[node] = x; + } + + TIterator** Heap; + int Count; +}; + +/////////////////////////////////////////////////////////////////////////////// diff --git a/library/cpp/microbdb/input.h b/library/cpp/microbdb/input.h new file mode 100644 index 0000000000..a214ba6e8a --- /dev/null +++ b/library/cpp/microbdb/input.h @@ -0,0 +1,1027 @@ +#pragma once + +#include "header.h" +#include "file.h" +#include "reader.h" + +#include <util/system/maxlen.h> +#include <util/system/event.h> +#include <util/system/thread.h> + +#include <thread> + +#include <sys/uio.h> + +#include <errno.h> + +template <class TFileManip> +inline ssize_t Readv(TFileManip& fileManip, const struct iovec* iov, int iovcnt) { + ssize_t read_count = 0; + for (int n = 0; n < iovcnt; n++) { + ssize_t last_read = fileManip.Read(iov[n].iov_base, iov[n].iov_len); + if (last_read < 0) + return -1; + read_count += last_read; + } + return read_count; +} + +template <class TVal, typename TBasePageIter> +class TInputRecordIterator: public TBasePageIter { + typedef THolder<NMicroBDB::IBasePageReader<TVal>> TReaderHolder; + +public: + typedef TBasePageIter TPageIter; + + TInputRecordIterator() { + Init(); + } + + ~TInputRecordIterator() { + Term(); + } + + const TVal* Current() const { + return Rec; + } + + bool GetExtInfo(typename TExtInfoType<TVal>::TResult* extInfo) const { + if (!Rec) + return false; + return Reader->GetExtInfo(extInfo); + } + + const ui8* GetExtInfoRaw(size_t* len) const { + if (!Rec) + return nullptr; + return Reader->GetExtInfoRaw(len); + } + + size_t GetRecSize() const { + return Reader->GetRecSize(); + } + + size_t GetExtSize() const { + return Reader->GetExtSize(); + } + + const TVal* Next() { + if (RecNum) + --RecNum; + else { + TDatPage* page = TPageIter::Next(); + if (!page) { + if (TPageIter::IsFrozen() && Reader.Get()) + Reader->SetClearFlag(); + return Rec = nullptr; + } else if (!!SelectReader()) + return Rec = nullptr; + RecNum = TPageIter::Current()->RecNum - 1; + } + return Rec = Reader->Next(); + } + + // Skip(0) == Current(); Skip(1) == Next() + const TVal* Skip(int& num) { + // Y_ASSERT(num >= 0); ? otherwise it gets into infinite loop + while (num > RecNum) { + num -= RecNum + 1; + if (!TPageIter::Next() || !!SelectReader()) { + RecNum = 0; + return Rec = nullptr; + } + RecNum = TPageIter::Current()->RecNum - 1; + Rec = Reader->Next(); + } + ++num; + while (--num) + Next(); + return Rec; + } + + // begin reading from next page + void Reset() { + Rec = NULL; + RecNum = 0; + if (Reader.Get()) + Reader->Reset(); + } + +protected: + int Init() { + Rec = nullptr; + RecNum = 0; + Format = MBDB_FORMAT_NULL; + return 0; + } + + int Term() { + Reader.Reset(nullptr); + Format = MBDB_FORMAT_NULL; + Rec = nullptr; + RecNum = 0; + return 0; + } + + const TVal* GotoPage(int pageno) { + if (!TPageIter::GotoPage(pageno) || !!SelectReader()) + return Rec = nullptr; + RecNum = TPageIter::Current()->RecNum - 1; + return Rec = Reader->Next(); + } + + int SelectReader() { + if (!TPageIter::Current()) + return MBDB_UNEXPECTED_EOF; + if (ui32(Format) != TPageIter::Current()->Format) { + switch (TPageIter::Current()->Format) { + case MBDB_FORMAT_RAW: + Reader.Reset(new NMicroBDB::TRawPageReader<TVal, TPageIter>(this)); + break; + case MBDB_FORMAT_COMPRESSED: + Reader.Reset(new NMicroBDB::TCompressedReader<TVal, TPageIter>(this)); + break; + default: + return MBDB_NOT_SUPPORTED; + } + Format = EPageFormat(TPageIter::Current()->Format); + } else { + Y_ASSERT(Reader.Get() != nullptr); + Reader->Reset(); + } + return 0; + } + + const TVal* Rec; + TReaderHolder Reader; + int RecNum; //!< number of records on the current page after the current record + EPageFormat Format; +}; + +template <class TBaseReader> +class TInputPageIterator: public TBaseReader { +public: + typedef TBaseReader TReader; + + TInputPageIterator() + : Buf(nullptr) + { + Term(); + } + + ~TInputPageIterator() { + Term(); + } + + TDatPage* Current() { + return CurPage; + } + + int Freeze() { + return (Frozen = (PageNum == -1) ? 0 : PageNum); + } + + void Unfreeze() { + Frozen = -1; + } + + inline int IsFrozen() const { + return Frozen + 1; + } + + inline size_t GetPageSize() const { + return TReader::GetPageSize(); + } + + inline int GetPageNum() const { + return PageNum; + } + + inline int IsEof() const { + return Eof; + } + + TDatPage* Next() { + if (PageNum >= Maxpage && ReadBuf()) { + Eof = Eof ? Eof : TReader::IsEof(); + return CurPage = nullptr; + } + return CurPage = (TDatPage*)(Buf + ((++PageNum) % Bufpages) * GetPageSize()); + } + + TDatPage* GotoPage(int pageno) { + if (pageno <= Maxpage && pageno >= (Maxpage - Pages + 1)) { + PageNum = pageno; + return CurPage = (TDatPage*)(Buf + (PageNum % Bufpages) * GetPageSize()); + } + if (IsFrozen() || TReader::GotoPage(pageno)) + return nullptr; + Maxpage = PageNum = pageno - 1; + Eof = 0; + return Next(); + } + +protected: + int Init(size_t pages, int pagesOrBytes) { + Term(); + if (pagesOrBytes == -1) + Bufpages = TReader::GetLastPage(); + else if (pagesOrBytes) + Bufpages = pages; + else + Bufpages = pages / GetPageSize(); + if (!TReader::GetLastPage()) { + Bufpages = 0; + assert(Eof == 1); + return 0; + } + int lastPage = TReader::GetLastPage(); + if (lastPage >= 0) + Bufpages = (int)Min(lastPage, Bufpages); + Bufpages = Max(2, Bufpages); + Eof = 0; + ABuf.Alloc(Bufpages * GetPageSize()); + return (Buf = ABuf.Begin()) ? 0 : ENOMEM; + // return (Buf = (char*)malloc(Bufpages * GetPageSize())) ? 0 : ENOMEM; + } + + int Term() { + // free(Buf); + ABuf.Dealloc(); + Buf = nullptr; + Maxpage = PageNum = Frozen = -1; + Bufpages = 0; + Pages = 0; + Eof = 1; + CurPage = nullptr; + return 0; + } + + int ReadBuf() { + int nvec; + iovec vec[2]; + int maxpage = (Frozen == -1 ? Maxpage + 1 : Frozen) + Bufpages - 1; + int minpage = Maxpage + 1; + if (maxpage < minpage) + return EAGAIN; + minpage %= Bufpages; + maxpage %= Bufpages; + if (maxpage < minpage) { + vec[0].iov_base = Buf + GetPageSize() * minpage; + vec[0].iov_len = GetPageSize() * (Bufpages - minpage); + vec[1].iov_base = Buf; + vec[1].iov_len = GetPageSize() * (maxpage + 1); + nvec = 2; + } else { + vec[0].iov_base = Buf + GetPageSize() * minpage; + vec[0].iov_len = GetPageSize() * (maxpage - minpage + 1); + nvec = 1; + } + TReader::ReadPages(vec, nvec, &Pages); + Maxpage += Pages; + return !Pages; + } + + int Maxpage, PageNum, Frozen, Bufpages, Eof, Pages; + TDatPage* CurPage; + // TMappedArray<char> ABuf; + TMappedAllocation ABuf; + char* Buf; +}; + +template <class TBaseReader> +class TInputPageIteratorMT: public TBaseReader { +public: + typedef TBaseReader TReader; + + TInputPageIteratorMT() + : CurBuf(0) + , CurReadBuf(0) + , Buf(nullptr) + { + Term(); + } + + ~TInputPageIteratorMT() { + Term(); + } + + TDatPage* Current() { + return CurPage; + } + + int Freeze() { + return (Frozen = (PageNum == -1) ? 0 : PageNum); + } + + void Unfreeze() { + Frozen = -1; + } + + inline int IsFrozen() const { + return Frozen + 1; + } + + inline size_t GetPageSize() const { + return TReader::GetPageSize(); + } + + inline int GetPageNum() const { + return PageNum; + } + + inline int IsEof() const { + return Eof; + } + + TDatPage* Next() { + if (Eof) + return CurPage = nullptr; + if (PageNum >= Maxpage && ReadBuf()) { + Eof = Eof ? Eof : TReader::IsEof(); + return CurPage = nullptr; + } + return CurPage = (TDatPage*)(Buf + ((++PageNum) % Bufpages) * GetPageSize()); + } + + TDatPage* GotoPage(int pageno) { + if (pageno <= Maxpage && pageno >= (Maxpage - Pages + 1)) { + PageNum = pageno; + return CurPage = (TDatPage*)(Buf + (PageNum % Bufpages) * GetPageSize()); + } + if (IsFrozen() || TReader::GotoPage(pageno)) + return nullptr; + Maxpage = PageNum = pageno - 1; + Eof = 0; + return Next(); + } + + void ReadPages() { + // fprintf(stderr, "ReadPages started\n"); + bool eof = false; + while (!eof) { + QEvent[CurBuf].Wait(); + if (Finish) + return; + int pages = ReadCurBuf(Bufs[CurBuf]); + PagesM[CurBuf] = pages; + eof = !pages; + AEvent[CurBuf].Signal(); + CurBuf ^= 1; + } + } + +protected: + int Init(size_t pages, int pagesOrBytes) { + Term(); + if (pagesOrBytes == -1) + Bufpages = TReader::GetLastPage(); + else if (pagesOrBytes) + Bufpages = pages; + else + Bufpages = pages / GetPageSize(); + if (!TReader::GetLastPage()) { + Bufpages = 0; + assert(Eof == 1); + return 0; + } + int lastPage = TReader::GetLastPage(); + if (lastPage >= 0) + Bufpages = (int)Min(lastPage, Bufpages); + Bufpages = Max(2, Bufpages); + Eof = 0; + ABuf.Alloc(Bufpages * GetPageSize() * 2); + Bufs[0] = ABuf.Begin(); + Bufs[1] = Bufs[0] + Bufpages * GetPageSize(); + // return (Buf = (char*)malloc(Bufpages * GetPageSize())) ? 0 : ENOMEM; + Finish = false; + ReadThread = std::thread([this]() { + TThread::SetCurrentThreadName("DatReader"); + ReadPages(); + }); + QEvent[0].Signal(); + return Bufs[0] ? 0 : ENOMEM; + } + + void StopThread() { + Finish = true; + QEvent[0].Signal(); + QEvent[1].Signal(); + ReadThread.join(); + } + + int Term() { + // free(Buf); + if (ReadThread.joinable()) + StopThread(); + ABuf.Dealloc(); + Buf = nullptr; + Bufs[0] = nullptr; + Bufs[1] = nullptr; + Maxpage = MaxpageR = PageNum = Frozen = -1; + Bufpages = 0; + Pages = 0; + Eof = 1; + CurPage = nullptr; + return 0; + } + + int ReadCurBuf(char* buf) { + int nvec; + iovec vec[2]; + int maxpage = (Frozen == -1 ? MaxpageR + 1 : Frozen) + Bufpages - 1; + int minpage = MaxpageR + 1; + if (maxpage < minpage) + return EAGAIN; + minpage %= Bufpages; + maxpage %= Bufpages; + if (maxpage < minpage) { + vec[0].iov_base = buf + GetPageSize() * minpage; + vec[0].iov_len = GetPageSize() * (Bufpages - minpage); + vec[1].iov_base = buf; + vec[1].iov_len = GetPageSize() * (maxpage + 1); + nvec = 2; + } else { + vec[0].iov_base = buf + GetPageSize() * minpage; + vec[0].iov_len = GetPageSize() * (maxpage - minpage + 1); + nvec = 1; + } + int pages; + TReader::ReadPages(vec, nvec, &pages); + MaxpageR += pages; + return pages; + } + + int ReadBuf() { + QEvent[CurReadBuf ^ 1].Signal(); + AEvent[CurReadBuf].Wait(); + Buf = Bufs[CurReadBuf]; + Maxpage += (Pages = PagesM[CurReadBuf]); + CurReadBuf ^= 1; + return !Pages; + } + + int Maxpage, MaxpageR, PageNum, Frozen, Bufpages, Eof, Pages; + TDatPage* CurPage; + // TMappedArray<char> ABuf; + ui32 CurBuf; + ui32 CurReadBuf; + TMappedAllocation ABuf; + char* Buf; + char* Bufs[2]; + ui32 PagesM[2]; + TAutoEvent QEvent[2]; + TAutoEvent AEvent[2]; + std::thread ReadThread; + bool Finish; +}; + +template <typename TFileManip> +class TInputPageFileImpl: private TNonCopyable { +protected: + TFileManip FileManip; + +public: + TInputPageFileImpl() + : Pagesize(0) + , Fd(-1) + , Eof(1) + , Error(0) + , Pagenum(0) + , Recordsig(0) + { + Term(); + } + + ~TInputPageFileImpl() { + Term(); + } + + inline int IsEof() const { + return Eof; + } + + inline int GetError() const { + return Error; + } + + inline size_t GetPageSize() const { + return Pagesize; + } + + inline int GetLastPage() const { + return Pagenum; + } + + inline ui32 GetRecordSig() const { + return Recordsig; + } + + inline bool IsOpen() const { + return FileManip.IsOpen(); + } + +protected: + int Init(const char* fname, ui32 recsig, ui32* gotrecsig = nullptr, bool direct = false) { + Error = FileManip.Open(fname, direct); + return Error ? Error : Init(TFile(), recsig, gotrecsig); + } + + int Init(const TFile& file, ui32 recsig, ui32* gotrecsig = nullptr) { + if (!file.IsOpen() && !FileManip.IsOpen()) + return MBDB_NOT_INITIALIZED; + if (file.IsOpen() && FileManip.IsOpen()) + return MBDB_ALREADY_INITIALIZED; + if (file.IsOpen()) { + Error = FileManip.Init(file); + if (Error) + return Error; + } + + // TArrayHolder<ui8> buf(new ui8[METASIZE + FS_BLOCK_SIZE]); + // ui8* ptr = (buf.Get() + FS_BLOCK_SIZE - ((ui64)buf.Get() & (FS_BLOCK_SIZE - 1))); + TMappedArray<ui8> buf; + buf.Create(METASIZE); + ui8* ptr = &buf[0]; + TDatMetaPage* meta = (TDatMetaPage*)ptr; + ssize_t size = METASIZE; + ssize_t ret; + while (size && (ret = FileManip.Read(ptr, (unsigned)size)) > 0) { + Y_ASSERT(ret <= size); + size -= ret; + ptr += ret; + } + if (size) { + FileManip.Close(); + return Error = MBDB_BAD_METAPAGE; + } + if (gotrecsig) + *gotrecsig = meta->RecordSig; + return Init(TFile(), meta, recsig); + } + + int Init(TAutoPtr<IInputStream> input, ui32 recsig, ui32* gotrecsig = nullptr) { + if (!input && !FileManip.IsOpen()) + return MBDB_NOT_INITIALIZED; + if (FileManip.IsOpen()) + return MBDB_ALREADY_INITIALIZED; + + Error = FileManip.Open(input); + if (Error) + return Error; + + TArrayHolder<ui8> buf(new ui8[METASIZE]); + ui8* ptr = buf.Get(); + ssize_t size = METASIZE; + ssize_t ret; + while (size && (ret = FileManip.Read(ptr, (unsigned)size)) > 0) { + Y_ASSERT(ret <= size); + size -= ret; + ptr += ret; + } + if (size) { + FileManip.Close(); + return Error = MBDB_BAD_METAPAGE; + } + TDatMetaPage* meta = (TDatMetaPage*)buf.Get(); + if (gotrecsig) + *gotrecsig = meta->RecordSig; + return Init(TFile(), meta, recsig); + } + + int Init(const TFile& file, const TDatMetaPage* meta, ui32 recsig) { + if (!file.IsOpen() && !FileManip.IsOpen()) + return MBDB_NOT_INITIALIZED; + if (file.IsOpen() && FileManip.IsOpen()) + return MBDB_ALREADY_INITIALIZED; + if (file.IsOpen()) { + Error = FileManip.Init(file); + if (Error) + return Error; + } + + if (meta->MetaSig != METASIG) + Error = MBDB_BAD_METAPAGE; + else if (meta->RecordSig != recsig) + Error = MBDB_BAD_RECORDSIG; + + if (Error) { + FileManip.Close(); + return Error; + } + + i64 flength = FileManip.GetLength(); + if (flength >= 0) { + i64 fsize = flength; + fsize -= METASIZE; + if (fsize % meta->PageSize) + return Error = MBDB_BAD_FILE_SIZE; + Pagenum = (int)(fsize / meta->PageSize); + } else { + Pagenum = -1; + } + Pagesize = meta->PageSize; + Recordsig = meta->RecordSig; + Error = Eof = 0; + return Error; + } + + int ReadPages(iovec* vec, int nvec, int* pages) { + *pages = 0; + + if (Eof || Error) + return Error; + + ssize_t size = 0, delta = 0, total = 0; + iovec* pvec = vec; + int vsize = nvec; + + while (vsize && (size = Readv(FileManip, pvec, (int)Min(vsize, 16))) > 0) { + total += size; + if (delta) { + size += delta; + pvec->iov_len += delta; + pvec->iov_base = (char*)pvec->iov_base - delta; + delta = 0; + } + while (size) { + if ((size_t)size >= pvec->iov_len) { + size -= pvec->iov_len; + ++pvec; + --vsize; + } else { + delta = size; + pvec->iov_len -= size; + pvec->iov_base = (char*)pvec->iov_base + size; + size = 0; + } + } + } + if (delta) { + pvec->iov_len += delta; + pvec->iov_base = (char*)pvec->iov_base - delta; + } + if (size < 0) + return Error = errno ? errno : MBDB_READ_ERROR; + if (total % Pagesize) + return Error = MBDB_BAD_FILE_SIZE; + if (vsize) + Eof = 1; + *pages = total / Pagesize; // it would be better to assign it after the for-loops + for (; total; ++vec, total -= size) + for (size = 0; size < total && (size_t)size < vec->iov_len; size += Pagesize) + if (((TDatPage*)((char*)vec->iov_base + size))->PageSig != PAGESIG) + return Error = MBDB_BAD_PAGESIG; + return Error; + } + + int GotoPage(int page) { + if (Error) + return Error; + Eof = 0; + i64 offset = (i64)page * Pagesize + METASIZE; + if (offset != FileManip.Seek(offset, SEEK_SET)) + Error = MBDB_BAD_FILE_SIZE; + return Error; + } + + int Term() { + return FileManip.Close(); + } + + size_t Pagesize; + int Fd; + int Eof; + int Error; + int Pagenum; //!< number of pages in this file + ui32 Recordsig; +}; + +template <class TBaseReader> +class TMappedInputPageIterator: public TBaseReader { +public: + typedef TBaseReader TReader; + + TMappedInputPageIterator() { + Term(); + } + + ~TMappedInputPageIterator() { + Term(); + } + + TDatPage* Current() { + return CurPage; + } + + inline size_t GetPageSize() const { + return TReader::GetPageSize(); + } + + inline int GetPageNum() const { + return PageNum; + } + + inline int IsEof() const { + return Eof; + } + + inline int IsFrozen() const { + return 0; + } + + TDatPage* Next() { + i64 pos = (i64)(++PageNum) * GetPageSize() + METASIZE; + if (pos < 0 || pos >= (i64)TReader::GetSize()) { + Eof = 1; + return CurPage = nullptr; + } + return CurPage = (TDatPage*)((char*)TReader::GetData() + pos); + } + +protected: + int Init(size_t /*pages*/, int /*pagesOrBytes*/) { + Term(); + Eof = 0; + return 0; + } + + int Term() { + PageNum = -1; + Eof = 1; + CurPage = nullptr; + return 0; + } + + TDatPage* GotoPage(int pageno) { + PageNum = pageno - 1; + Eof = 0; + return Next(); + } + + int PageNum, Eof, Pages, Pagenum; + TDatPage* CurPage; +}; + +using TInputPageFile = TInputPageFileImpl<TInputFileManip>; + +template <class TVal, + typename TBaseRecIter = TInputRecordIterator<TVal, TInputPageIterator<TInputPageFile>>> +class TInDatFileImpl: public TBaseRecIter { +public: + typedef TBaseRecIter TRecIter; + typedef typename TRecIter::TPageIter TPageIter; + typedef typename TRecIter::TPageIter::TReader TReader; + using TRecIter::GotoPage; + + int Open(const char* fname, size_t pages = 1, int pagesOrBytes = 1, ui32* gotRecordSig = nullptr, bool direct = false) { + int ret = TReader::Init(fname, TVal::RecordSig, gotRecordSig, direct); + return ret ? ret : Open2(pages, pagesOrBytes); + } + + int Open(const TFile& file, size_t pages = 1, int pagesOrBytes = 1, ui32* gotRecordSig = nullptr) { + int ret = TReader::Init(file, TVal::RecordSig, gotRecordSig); + return ret ? ret : Open2(pages, pagesOrBytes); + } + + int Open(TAutoPtr<IInputStream> input, size_t pages = 1, int pagesOrBytes = 1, ui32* gotRecordSig = nullptr) { + int ret = TReader::Init(input, TVal::RecordSig, gotRecordSig); + return ret ? ret : Open2(pages, pagesOrBytes); + } + + int Open(const TFile& file, const TDatMetaPage* meta, size_t pages = 1, int pagesOrBytes = 1) { + int ret = TReader::Init(file, meta, TVal::RecordSig); + return ret ? ret : Open2(pages, pagesOrBytes); + } + + int Close() { + int ret1 = TRecIter::Term(); + int ret2 = TPageIter::Term(); + int ret3 = TReader::Term(); + return ret1 ? ret1 : ret2 ? ret2 : ret3; + } + + const TVal* GotoLastPage() { + return TReader::GetLastPage() <= 0 ? nullptr : TRecIter::GotoPage(TReader::GetLastPage() - 1); + } + +private: + int Open2(size_t pages, int pagesOrBytes) { + int ret = TPageIter::Init(pages, pagesOrBytes); + if (!ret) + ret = TRecIter::Init(); + if (ret) + Close(); + return ret; + } +}; + +template <class TVal> +class TInIndexFile: protected TInDatFileImpl<TVal> { + typedef TInDatFileImpl<TVal> TDatFile; + typedef typename TDatFile::TRecIter TRecIter; + typedef typename TRecIter::TPageIter TPageIter; + typedef typename TExtInfoType<TVal>::TResult TExtInfo; + +public: + using TDatFile::IsOpen; + + TInIndexFile() + : Index0(nullptr) + { + } + + int Open(const char* fname, size_t pages = 2, int pagesOrBytes = 1, ui32* gotRecordSig = nullptr) { + int ret = TDatFile::Open(fname, pages, pagesOrBytes, gotRecordSig); + if (ret) + return ret; + if (!(Index0 = (TDatPage*)malloc(TPageIter::GetPageSize()))) { + TDatFile::Close(); + return MBDB_NO_MEMORY; + } + if (!TExtInfoType<TVal>::Exists && SizeOf((TVal*)nullptr)) + RecsOnPage = (TPageIter::GetPageSize() - sizeof(TDatPage)) / DatCeil(SizeOf((TVal*)nullptr)); + TDatFile::Next(); + memcpy(Index0, TPageIter::Current(), TPageIter::GetPageSize()); + return 0; + } + + int Close() { + free(Index0); + Index0 = nullptr; + return TDatFile::Close(); + } + + inline int GetError() const { + return TDatFile::GetError(); + } + + int FindKey(const TVal* akey, const TExtInfo* extInfo = nullptr) { + assert(IsOpen()); + if (TExtInfoType<TVal>::Exists || !SizeOf((TVal*)nullptr)) + return FindVszKey(akey, extInfo); + int num = FindKeyOnPage(Index0, akey); + TDatPage* page = TPageIter::GotoPage(num + 1); + if (!page) + return 0; + num = FindKeyOnPage(page, akey); + num += (TPageIter::GetPageNum() - 1) * RecsOnPage; + return num; + } + + int FindVszKey(const TVal* akey, const TExtInfo* extInfo = NULL) { + int num = FindVszKeyOnPage(Index0, akey, extInfo); + int num_add = 0; + for (int p = 0; p < num; p++) { + TDatPage* page = TPageIter::GotoPage(p + 1); + if (!page) + return 0; + num_add += page->RecNum; + } + TDatPage* page = TPageIter::GotoPage(num + 1); + if (!page) + return 0; + num = FindVszKeyOnPage(page, akey, extInfo); + num += num_add; + return num; + } + +protected: + int FindKeyOnPage(TDatPage* page, const TVal* key) { + int left = 0; + int right = page->RecNum - 1; + int recsize = DatCeil(SizeOf((TVal*)nullptr)); + while (left < right) { + int middle = (left + right) >> 1; + if (*((TVal*)((char*)page + sizeof(TDatPage) + middle * recsize)) < *key) + left = middle + 1; + else + right = middle; + } + //borders check (left and right) + return (left == 0 || *((TVal*)((char*)page + sizeof(TDatPage) + left * recsize)) < *key) ? left : left - 1; + } + + // will deserialize rawExtinfoA to extInfoA only if necessery + inline bool KeyLess_(const TVal* a, const TVal* b, + TExtInfo* extInfoA, const TExtInfo* extInfoB, + const ui8* rawExtInfoA, size_t rawLen) { + if (*a < *b) { + return true; + } else if (!extInfoB || *b < *a) { + return false; + } else { + // *a == *b && extInfoB + Y_PROTOBUF_SUPPRESS_NODISCARD extInfoA->ParseFromArray(rawExtInfoA, rawLen); + return (*extInfoA < *extInfoB); + } + } + + int FindVszKeyOnPage(TDatPage* page, const TVal* key, const TExtInfo* extInfo) { + TVal* cur = (TVal*)((char*)page + sizeof(TDatPage)); + ui32 recnum = page->RecNum; + if (!TExtInfoType<TVal>::Exists) { + for (; recnum > 0 && *cur < *key; --recnum) + cur = (TVal*)((char*)cur + DatCeil(SizeOf(cur))); + } else { + size_t ll; + size_t l; + size_t sz = NMicroBDB::SizeOfExt(cur, &ll, &l); + TExtInfo ei; + for (; recnum > 0 && KeyLess_(cur, key, &ei, extInfo, (ui8*)cur + sz + ll, l); --recnum) { + cur = (TVal*)((ui8*)cur + DatCeil(sz + ll + l)); + sz = NMicroBDB::SizeOfExt(cur, &ll, &l); + } + } + + int idx = page->RecNum - recnum - 1; + return (idx >= 0) ? idx : 0; + } + + TDatPage* Index0; + int RecsOnPage; +}; + +template <class TVal, class TKey, class TPageIterator = TInputPageIterator<TInputPageFile>> +class TKeyFileMixin: public TInDatFileImpl<TVal, TInputRecordIterator<TVal, TPageIterator>> { +protected: + TInIndexFile<TKey> KeyFile; +}; + +template <class TVal, class TKey, class TBase = TKeyFileMixin<TVal, TKey>> +class TDirectInDatFile: public TBase { + typedef TBase TDatFile; + typedef typename TDatFile::TRecIter TRecIter; + typedef typename TDatFile::TPageIter TPageIter; + +public: + void Open(const char* path, size_t pages = 1, size_t keypages = 1, int pagesOrBytes = 1) { + int ret; + ui32 gotRecordSig = 0; + + ret = TDatFile::Open(path, pages, pagesOrBytes, &gotRecordSig); + if (ret) { + ythrow yexception() << ErrorMessage(ret, "Failed to open input file", path, TVal::RecordSig, gotRecordSig); + } + char KeyName[PATH_MAX + 1]; + if (DatNameToIdx(KeyName, path)) { + ythrow yexception() << ErrorMessage(MBDB_BAD_FILENAME, "Failed to open input file", path); + } + gotRecordSig = 0; + ret = KeyFile.Open(KeyName, keypages, 1, &gotRecordSig); + if (ret) { + ythrow yexception() << ErrorMessage(ret, "Failed to open input keyfile", KeyName, TKey::RecordSig, gotRecordSig); + } + } + + void Close() { + int ret; + + if (TDatFile::IsOpen() && (ret = TDatFile::GetError())) + if (!std::uncaught_exception()) + ythrow yexception() << ErrorMessage(ret, "Error before closing input file"); + if ((ret = TDatFile::Close())) + if (!std::uncaught_exception()) + ythrow yexception() << ErrorMessage(ret, "Error while closing input file"); + + if (KeyFile.IsOpen() && (ret = KeyFile.GetError())) + if (!std::uncaught_exception()) + ythrow yexception() << ErrorMessage(ret, "Error before closing input keyfile"); + if ((ret = KeyFile.Close())) + if (!std::uncaught_exception()) + ythrow yexception() << ErrorMessage(ret, "Error while closing input keyfile"); + } + + const TVal* FindRecord(const TKey* key, const typename TExtInfoType<TKey>::TResult* extInfo = nullptr) { + int page = KeyFile.FindKey(key, extInfo); + const TVal* val = TRecIter::GotoPage(page); + if (!TExtInfoType<TVal>::Exists || !extInfo) { + TKey k; + while (val) { + TMakeExtKey<TVal, TKey>::Make(&k, nullptr, val, nullptr); + if (!(k < *key)) + break; + val = TRecIter::Next(); + } + } else { + typename TExtInfoType<TVal>::TResult valExt; + TKey k; + typename TExtInfoType<TKey>::TResult kExt; + while (val) { + TRecIter::GetExtInfo(&valExt); + TMakeExtKey<TVal, TKey>::Make(&k, &kExt, val, &valExt); + if (*key < k || !(k < *key) && !(kExt < *extInfo)) // k > *key || k == *key && kExt >= *extInfo + break; + val = TRecIter::Next(); + } + } + return val; + } + + int FindPagesNo(const TKey* key, const typename TExtInfoType<TVal>::TResult* extInfo = NULL) { + return KeyFile.FindKey(key, extInfo); + } + +protected: + using TBase::KeyFile; +}; diff --git a/library/cpp/microbdb/microbdb.cpp b/library/cpp/microbdb/microbdb.cpp new file mode 100644 index 0000000000..c10dbdf126 --- /dev/null +++ b/library/cpp/microbdb/microbdb.cpp @@ -0,0 +1 @@ +#include "microbdb.h" diff --git a/library/cpp/microbdb/microbdb.h b/library/cpp/microbdb/microbdb.h new file mode 100644 index 0000000000..7521887337 --- /dev/null +++ b/library/cpp/microbdb/microbdb.h @@ -0,0 +1,54 @@ +#pragma once + +#include <util/folder/dirut.h> + +#if defined(_MSC_VER) +#pragma warning(push) +#pragma warning(disable : 4706) /*assignment within conditional expression*/ +#pragma warning(disable : 4267) /*conversion from 'size_t' to 'type', possible loss of data*/ +#endif + +#include "align.h" +#include "extinfo.h" +#include "header.h" +#include "reader.h" +#include "heap.h" +#include "file.h" +#include "sorter.h" +#include "input.h" +#include "output.h" +#include "sorterdef.h" + +inline int MakeSorterTempl(char path[/*FILENAME_MAX*/], const char* prefix) { + int ret = MakeTempDir(path, prefix); + if (!ret && strlcat(path, "%06d", FILENAME_MAX) > FILENAME_MAX - 100) + ret = EINVAL; + if (ret) + path[0] = 0; + return ret; +} + +inline int GetMeta(TFile& file, TDatMetaPage* meta) { + ui8 buf[METASIZE], *ptr = buf; + ssize_t size = sizeof(buf), ret; + while (size && (ret = file.Read(ptr, size)) > 0) { + size -= ret; + ptr += ret; + } + if (size) + return MBDB_BAD_FILE_SIZE; + ptr = buf; // gcc 4.4 warning fix + *meta = *(TDatMetaPage*)ptr; + return (meta->MetaSig == METASIG) ? 0 : MBDB_BAD_METAPAGE; +} + +template <class TRec> +inline bool IsDatFile(const char* fname) { + TDatMetaPage meta; + TFile f(fname, RdOnly); + return !GetMeta(f, &meta) && meta.RecordSig == TRec::RecordSig; +} + +#if defined(_MSC_VER) +#pragma warning(pop) +#endif diff --git a/library/cpp/microbdb/noextinfo.proto b/library/cpp/microbdb/noextinfo.proto new file mode 100644 index 0000000000..6a78882e07 --- /dev/null +++ b/library/cpp/microbdb/noextinfo.proto @@ -0,0 +1,4 @@ + +message TNoExtInfo { +} + diff --git a/library/cpp/microbdb/output.h b/library/cpp/microbdb/output.h new file mode 100644 index 0000000000..d0ecab2108 --- /dev/null +++ b/library/cpp/microbdb/output.h @@ -0,0 +1,1049 @@ +#pragma once + +#include "header.h" +#include "file.h" + +#include <util/generic/buffer.h> +#include <util/memory/tempbuf.h> + +#include <sys/uio.h> + +template <class TFileManip> +inline ssize_t Writev(TFileManip& fileManip, const struct iovec* iov, int iovcnt) { + ssize_t written_count = 0; + for (int n = 0; n < iovcnt; n++) { + ssize_t last_write = fileManip.Write(iov[n].iov_base, iov[n].iov_len); + if (last_write < 0) + return -1; + written_count += last_write; + } + return written_count; +} + +//********************************************************************* +struct TFakeIndexer { + inline void NextPage(TDatPage*) noexcept { + } +}; + +struct TCallbackIndexer { + typedef void (*TCallback)(void* This, const TDatPage* page); + + TCallbackIndexer() { + Callback = nullptr; + } + + void SetCallback(void* t, TCallback c) { + This = t; + Callback = c; + } + + void NextPage(TDatPage* dat) { + Callback(This, dat); + } + + TCallback Callback; + void* This; +}; + +template <class TVal, typename TBasePageIter, typename TBaseIndexer = TFakeIndexer, typename TCompressor = TFakeCompression> +class TOutputRecordIterator; + +template <class TVal, typename TBasePageIter, typename TBaseIndexer> +class TOutputRecordIterator<TVal, TBasePageIter, TBaseIndexer, TFakeCompression> + : public TBasePageIter, public TBaseIndexer { +public: + enum EOffset { + WrongOffset = size_t(-1) + }; + + typedef TBasePageIter TPageIter; + typedef TBaseIndexer TIndexer; + + TOutputRecordIterator() { + Clear(); + } + + ~TOutputRecordIterator() { + Term(); + } + + inline const TVal* Current() const { + return Rec; + } + + const TVal* Push(const TVal* v, const typename TExtInfoType<TVal>::TResult* extInfo = nullptr) { + NMicroBDB::AssertValid(v); + size_t len = SizeOf(v); + if (!TExtInfoType<TVal>::Exists) + return (Reserve(len)) ? (TVal*)memcpy(Rec, v, len) : nullptr; + else if (extInfo) { + size_t extSize = extInfo->ByteSize(); + size_t extLenSize = len_long((i64)extSize); + if (!Reserve(len + extLenSize + extSize)) + return nullptr; + memcpy(Rec, v, len); + out_long((i64)extSize, (char*)Rec + len); + extInfo->SerializeWithCachedSizesToArray((ui8*)Rec + len + extLenSize); + return Rec; + } else { + size_t extLenSize = len_long((i64)0); + if (!Reserve(len + extLenSize)) + return nullptr; + memcpy(Rec, v, len); + out_long((i64)0, (char*)Rec + len); + return Rec; + } + } + + const TVal* Push(const TVal* v, const ui8* extInfoRaw, size_t extLen) { + NMicroBDB::AssertValid(v); + size_t sz = SizeOf(v); + if (!Reserve(sz + extLen)) + return nullptr; + memcpy(Rec, v, sz); + memcpy((ui8*)Rec + sz, extInfoRaw, extLen); + return Rec; + } + + // use values stored in microbdb readers/writers internal buffer only. + // method expects serialized extInfo after this record + const TVal* PushWithExtInfo(const TVal* v) { + NMicroBDB::AssertValid(v); + size_t extSize; + size_t extLenSize; + size_t sz = NMicroBDB::SizeOfExt(v, &extLenSize, &extSize); + sz += extLenSize + extSize; + if (!Reserve(sz)) + return nullptr; + memcpy(Rec, v, sz); + return Rec; + } + + TVal* Reserve(size_t len) { + if (CurLen + DatCeil(len) > TPageIter::GetPageSize()) { + if (sizeof(TDatPage) + DatCeil(len) > TPageIter::GetPageSize()) + return Rec = nullptr; + if (TPageIter::Current() && RecNum) { + TPageIter::Current()->RecNum = RecNum; + TPageIter::Current()->Format = MBDB_FORMAT_RAW; + memset((char*)TPageIter::Current() + CurLen, 0, TPageIter::GetPageSize() - CurLen); + TIndexer::NextPage(TPageIter::Current()); + RecNum = 0; + } + if (!TPageIter::Next()) { + CurLen = TPageIter::GetPageSize(); + return Rec = nullptr; + } + CurLen = sizeof(TDatPage); + } + LenForOffset = CurLen; + Rec = (TVal*)((char*)TPageIter::Current() + CurLen); + DatSet(Rec, len); + + CurLen += DatCeil(len); + + ++RecNum; + return Rec; + } + + void Flush() { + TPageIter::Current()->RecNum = RecNum; + TPageIter::Current()->Format = MBDB_FORMAT_RAW; + } + + size_t Offset() const { + return Rec ? TPageIter::Offset() + LenForOffset : WrongOffset; + } + + void ResetDat() { + CurLen = (char*)Rec - (char*)TPageIter::Current(); + size_t len; + if (!TExtInfoType<TVal>::Exists) { + len = SizeOf(Rec); + } else { + size_t ll; + size_t l; + len = NMicroBDB::SizeOfExt(Rec, &ll, &l); + len += ll + l; + } + CurLen += DatCeil(len); + } + +protected: + void Clear() { + Rec = nullptr; + RecNum = 0; + CurLen = 0; + LenForOffset = 0; + } + + int Init() { + Clear(); + CurLen = TPageIter::GetPageSize(); + return 0; + } + + int Term() { + if (TPageIter::Current()) { + TPageIter::Current()->RecNum = RecNum; + TPageIter::Current()->Format = MBDB_FORMAT_RAW; + memset((char*)TPageIter::Current() + CurLen, 0, TPageIter::GetPageSize() - CurLen); + RecNum = 0; + } + int ret = !TPageIter::Current() && RecNum; + Clear(); + return ret; + } + + int GotoPage(int pageno) { + if (TPageIter::Current()) { + TPageIter::Current()->RecNum = RecNum; + TPageIter::Current()->Format = MBDB_FORMAT_RAW; + memset((char*)TPageIter::Current() + CurLen, 0, TPageIter::GetPageSize() - CurLen); + } + int ret = TPageIter::GotoPage(pageno); + if (!ret) { + RecNum = 0; + CurLen = sizeof(TDatPage); + } + return ret; + } + + TVal* Rec; + int RecNum; + size_t CurLen; + size_t LenForOffset; +}; + +template <class TVal, typename TBasePageIter, typename TBaseIndexer, typename TAlgorithm> +class TOutputRecordIterator + : public TBasePageIter, + public TBaseIndexer, + private TAlgorithm { + class TPageBuffer { + public: + void Init(size_t page) { + Pos = 0; + RecNum = 0; + Size = Min(page / 2, size_t(64 << 10)); + Data.Reset(new ui8[Size]); + } + + void Clear() { + Pos = 0; + RecNum = 0; + } + + inline bool Empty() const { + return RecNum == 0; + } + + public: + size_t Size; + size_t Pos; + int RecNum; + TArrayHolder<ui8> Data; + }; + +public: + typedef TBasePageIter TPageIter; + typedef TBaseIndexer TIndexer; + + TOutputRecordIterator() + : Rec(nullptr) + , RecNum(0) + { + } + + ~TOutputRecordIterator() { + Term(); + } + + const TVal* Current() const { + return Rec; + } + + const TVal* Push(const TVal* v, const typename TExtInfoType<TVal>::TResult* extInfo = nullptr) { + NMicroBDB::AssertValid(v); + size_t len = SizeOf(v); + if (!TExtInfoType<TVal>::Exists) + return (Reserve(len)) ? (TVal*)memcpy((TVal*)Rec, v, len) : nullptr; + else if (extInfo) { + size_t extSize = extInfo->ByteSize(); + size_t extLenSize = len_long((i64)extSize); + if (!Reserve(len + extLenSize + extSize)) + return nullptr; + memcpy(Rec, v, len); + out_long((i64)extSize, (char*)Rec + len); + extInfo->SerializeWithCachedSizesToArray((ui8*)Rec + len + extLenSize); + return Rec; + } else { + size_t extLenSize = len_long((i64)0); + if (!Reserve(len + extLenSize)) + return nullptr; + memcpy(Rec, v, len); + out_long((i64)0, (char*)Rec + len); + return Rec; + } + } + + const TVal* Push(const TVal* v, const ui8* extInfoRaw, size_t extLen) { + NMicroBDB::AssertValid(v); + size_t sz = SizeOf(v); + if (!Reserve(sz + extLen)) + return NULL; + memcpy(Rec, v, sz); + memcpy((ui8*)Rec + sz, extInfoRaw, extLen); + return Rec; + } + + // use values stored in microbdb readers/writers internal buffer only. + // method expects serialized extInfo after this record + const TVal* PushWithExtInfo(const TVal* v) { + NMicroBDB::AssertValid(v); + size_t extSize; + size_t extLenSize; + size_t sz = NMicroBDB::SizeOfExt(v, &extLenSize, &extSize); + sz += extLenSize + extSize; + if (!Reserve(sz)) + return nullptr; + memcpy(Rec, v, sz); + return Rec; + } + + TVal* Reserve(const size_t len) { + const size_t aligned = DatCeil(len); + + if (!TPageIter::Current()) { // Allocate fist page + if (!TPageIter::Next()) { + CurLen = TPageIter::GetPageSize(); + return Rec = nullptr; + } + CurLen = sizeof(TDatPage) + sizeof(TCompressedPage); + } + + if (Buffer.Pos + aligned > Buffer.Size) { + if (Buffer.Pos == 0) + return Rec = nullptr; + if (FlushBuffer()) + return Rec = nullptr; + if (Buffer.Pos + aligned + sizeof(TDatPage) + sizeof(TCompressedPage) > Buffer.Size) + return Rec = nullptr; + } + + Rec = (TVal*)((char*)Buffer.Data.Get() + Buffer.Pos); + DatSet(Rec, len); // len is correct because DatSet set align tail to zero + + Buffer.RecNum++; + Buffer.Pos += aligned; + ++RecNum; + return Rec; + } + + void Flush() { + if (!Buffer.Empty()) { + FlushBuffer(); + TPageIter::Current()->RecNum = RecNum; + TPageIter::Current()->Format = MBDB_FORMAT_COMPRESSED; + } + } + + size_t Offset() const { + // According to vadya@ there is no evil to return 0 all the time + return 0; + } + + void ResetDat() { + Buffer.Pos = (char*)Rec - (char*)Buffer.Data.Get(); + size_t len = SizeOf(Rec); + Buffer.Pos += DatCeil(len); + } + +protected: + void Clear() { + RecNum = 0; + Rec = nullptr; + Count = 0; + CurLen = sizeof(TDatPage) + sizeof(TCompressedPage); + Buffer.Clear(); + } + + int Init() { + Clear(); + Buffer.Init(TPageIter::GetPageSize()); + TAlgorithm::Init(); + return 0; + } + + int Term() { + if (TPageIter::Current()) + Commit(); + int ret = !TPageIter::Current() && RecNum; + Clear(); + TAlgorithm::Term(); + return ret; + } + + int GotoPage(int pageno) { + if (TPageIter::Current()) + Commit(); + int ret = TPageIter::GotoPage(pageno); + if (!ret) + Reset(); + return ret; + } + +private: + void Commit() { + Flush(); + TPageIter::Current()->RecNum = RecNum; + TPageIter::Current()->Format = MBDB_FORMAT_COMPRESSED; + SetCompressedPageHeader(); + + memset((char*)TPageIter::Current() + CurLen, 0, TPageIter::GetPageSize() - CurLen); + RecNum = 0; + Count = 0; + } + + inline void SetCompressedPageHeader() { + TCompressedPage* const hdr = (TCompressedPage*)((ui8*)TPageIter::Current() + sizeof(TDatPage)); + + hdr->BlockCount = Count; + hdr->Algorithm = TAlgorithm::Code; + hdr->Version = 0; + hdr->Reserved = 0; + } + + inline void Reset() { + RecNum = 0; + CurLen = sizeof(TDatPage) + sizeof(TCompressedPage); + Count = 0; + Buffer.Clear(); + } + + int FlushBuffer() { + TArrayHolder<ui8> data; + const ui8* const buf = Buffer.Data.Get(); + size_t first = 0; + + if (!TExtInfoType<TVal>::Exists) + first = DatCeil(SizeOf((TVal*)buf)); + else { + size_t ll; + size_t l; + first = NMicroBDB::SizeOfExt((const TVal*)buf, &ll, &l); + first = DatCeil(first + ll + l); + } + + size_t total = sizeof(NMicroBDB::TCompressedHeader) + first + ((Buffer.RecNum == 1) ? 0 : TAlgorithm::CompressBound(Buffer.Pos - first)); + size_t real = total; + + { + ui8* p = nullptr; + NMicroBDB::TCompressedHeader* hdr = nullptr; + + // 1. Choose data destination (temporary buffer or dat-page) + if (CurLen + total > TPageIter::GetPageSize()) { + data.Reset(new ui8[total]); + + hdr = (NMicroBDB::TCompressedHeader*)data.Get(); + p = data.Get() + sizeof(NMicroBDB::TCompressedHeader); + } else { + p = (ui8*)TPageIter::Current() + CurLen; + hdr = (NMicroBDB::TCompressedHeader*)p; + p += sizeof(NMicroBDB::TCompressedHeader); + } + + // 2. Compress data + + // Fill header and first record + hdr->Original = Buffer.Pos; + hdr->Compressed = 0; + hdr->Count = Buffer.RecNum; + hdr->Reserved = 0; + memcpy(p, Buffer.Data.Get(), first); + // Fill compressed part + if (Buffer.RecNum > 1) { + size_t size = TAlgorithm::CompressBound(Buffer.Pos - first); + + p += first; + TAlgorithm::Compress(p, size, buf + first, Buffer.Pos - first); + + hdr->Compressed = size; + + real = sizeof(NMicroBDB::TCompressedHeader) + first + size; + } + } + + Y_ASSERT(sizeof(TDatPage) + sizeof(TCompressedPage) + real <= TPageIter::GetPageSize()); + + // 3. Check page capacity + + if (CurLen + real > TPageIter::GetPageSize()) { + Y_ASSERT(data.Get() != nullptr); + + if (TPageIter::Current() && RecNum) { + RecNum = RecNum - Buffer.RecNum; + TPageIter::Current()->RecNum = RecNum; + TPageIter::Current()->Format = MBDB_FORMAT_COMPRESSED; + SetCompressedPageHeader(); + memset((char*)TPageIter::Current() + CurLen, 0, TPageIter::GetPageSize() - CurLen); + TIndexer::NextPage(TPageIter::Current()); + RecNum = Buffer.RecNum; + Count = 0; + } + if (!TPageIter::Next()) { + CurLen = TPageIter::GetPageSize(); + return MBDB_NO_MEMORY; + } + CurLen = sizeof(TDatPage) + sizeof(TCompressedPage); + } + + // 4. Flush data and reset buffer state + + if (data.Get()) + memcpy((ui8*)TPageIter::Current() + CurLen, data.Get(), real); + CurLen += real; + ++Count; + Buffer.Clear(); + return 0; + } + +private: + size_t CurLen; + TPageBuffer Buffer; + TVal* Rec; + ui32 Count; //! < count of compressed blocks on page +public: + int RecNum; +}; + +template <typename TBaseWriter> +class TOutputPageIterator: public TBaseWriter { +public: + typedef TBaseWriter TWriter; + + TOutputPageIterator() + : Buf(nullptr) + { + Clear(); + } + + ~TOutputPageIterator() { + Term(); + } + + TDatPage* Current() { + return CurPage; + } + + size_t Offset() const { + //Cout << "PS = " << TWriter::GetPageSize() << "; PN = " << PageNum << "; MS = " << METASIZE << Endl; + return TWriter::GetPageSize() * PageNum + METASIZE; + } + + int Freeze() { + return (Frozen = (PageNum == -1) ? 0 : (int)PageNum); + } + + void Unfreeze() { + Frozen = -1; + } + + inline int IsFrozen() const { + return Frozen + 1; + } + + inline size_t GetPageSize() const { + return TWriter::GetPageSize(); + } + + inline int GetPageNum() const { + return (int)PageNum; + } + + TDatPage* Next() { + if (PageNum >= Maxpage && WriteBuf()) + return CurPage = nullptr; + CurPage = (TDatPage*)(Buf + ((++PageNum) % Bufpages) * GetPageSize()); + memset(CurPage, 0, sizeof(TDatPage)); + return CurPage; + } + +protected: + int Init(size_t pages, int pagesOrBytes) { + Term(); + if (pagesOrBytes) + Bufpages = pages; + else + Bufpages = pages / GetPageSize(); + Bufpages = Max<size_t>(1, Bufpages); + Maxpage = Bufpages - 1; + // if (!(Buf = (char*)malloc(Bufpages * GetPageSize()))) + // return ENOMEM; + ABuf.Alloc(Bufpages * GetPageSize()); + Buf = ABuf.Begin(); + if (TWriter::Memo) + Freeze(); + return 0; + } + + int Term() { + Unfreeze(); + int ret = (PageNum < 0) ? 0 : WriteBuf(); + Clear(); + return ret; + } + + int GotoPage(int pageno) { + int ret = EAGAIN; + if (IsFrozen() || PageNum >= 0 && ((ret = WriteBuf())) || ((ret = TWriter::GotoPage(pageno)))) + return ret; + PageNum = pageno; + Maxpage = Bufpages - 1 + pageno; + CurPage = (TDatPage*)(Buf + (PageNum % Bufpages) * GetPageSize()); + memset(CurPage, 0, sizeof(TDatPage)); + return 0; + } + + void Clear() { + ABuf.Dealloc(); + Buf = nullptr; + Maxpage = PageNum = Frozen = -1; + Bufpages = 0; + CurPage = nullptr; + } + + int WriteBuf() { + int nvec; + iovec vec[2]; + ssize_t minpage = Maxpage - Bufpages + 1; + ssize_t maxpage = Frozen == -1 ? PageNum : Frozen - 1; + if (maxpage < minpage) + return EAGAIN; + minpage %= Bufpages; + maxpage %= Bufpages; + if (maxpage < minpage) { + vec[0].iov_base = Buf + GetPageSize() * minpage; + vec[0].iov_len = GetPageSize() * (Bufpages - minpage); + vec[1].iov_base = Buf; + vec[1].iov_len = GetPageSize() * (maxpage + 1); + nvec = 2; + } else { + vec[0].iov_base = Buf + GetPageSize() * minpage; + vec[0].iov_len = GetPageSize() * (maxpage - minpage + 1); + nvec = 1; + } + if (TWriter::WritePages(vec, nvec)) + return EIO; + Maxpage += (maxpage < minpage) ? (Bufpages - minpage + maxpage + 1) : (maxpage - minpage + 1); + return 0; + } + + ssize_t Maxpage; + ssize_t Bufpages; + ssize_t PageNum; + int Frozen; + TDatPage* CurPage; + char* Buf; + TMappedAllocation ABuf; +}; + +template <class TFileManip> +class TOutputPageFileImpl: private TNonCopyable { +public: + TOutputPageFileImpl() + : Pagesize(0) + , Eof(1) + , Error(0) + , Memo(0) + , Recordsig(0) + { + } + + ~TOutputPageFileImpl() { + Term(); + } + + inline int IsEof() const { + return Eof; + } + + inline int GetError() const { + return Error; + } + + inline bool IsOpen() const { + return FileManip.IsOpen(); + } + + inline size_t GetPageSize() const { + return Pagesize; + } + + inline ui32 GetRecordSig() const { + return Recordsig; + } + + int Init(const char* fname, size_t pagesize, ui32 recsig, bool direct = false) { + Memo = 0; + if (FileManip.IsOpen()) + return MBDB_ALREADY_INITIALIZED; + + if (!fname) { + Eof = Error = 0; + Pagesize = pagesize; + Recordsig = recsig; + Memo = 1; + return 0; + } + + Error = FileManip.Open(fname, WrOnly | CreateAlways | ARW | AWOther | (direct ? DirectAligned : EOpenMode())); + if (Error) + return Error; + Error = Init(TFile(), pagesize, recsig); + if (Error) { + FileManip.Close(); + unlink(fname); + } + return Error; + } + + int Init(TAutoPtr<IOutputStream> output, size_t pagesize, ui32 recsig) { + Memo = 0; + if (FileManip.IsOpen()) { + return MBDB_ALREADY_INITIALIZED; + } + + if (!output) { + Eof = Error = 0; + Pagesize = pagesize; + Recordsig = recsig; + Memo = 1; + return 0; + } + + Error = FileManip.Open(output); + if (Error) + return Error; + Error = Init(TFile(), pagesize, recsig); + if (Error) { + FileManip.Close(); + } + return Error; + } + + int Init(const TFile& file, size_t pagesize, ui32 recsig) { + Memo = 0; + if (!file.IsOpen() && !FileManip.IsOpen()) + return MBDB_NOT_INITIALIZED; + if (file.IsOpen() && FileManip.IsOpen()) + return MBDB_ALREADY_INITIALIZED; + if (file.IsOpen()) { + Error = FileManip.Init(file); + if (Error) + return Error; + } + + Eof = 1; + TTempBuf buf(METASIZE + FS_BLOCK_SIZE); + const char* ptr = (buf.Data() + FS_BLOCK_SIZE - ((ui64)buf.Data() & (FS_BLOCK_SIZE - 1))); + TDatMetaPage* meta = (TDatMetaPage*)ptr; + + memset(buf.Data(), 0, buf.Size()); + meta->MetaSig = METASIG; + meta->PageSize = Pagesize = pagesize; + meta->RecordSig = Recordsig = recsig; + + ssize_t size = METASIZE, ret = 0; + while (size && (ret = FileManip.Write(ptr, (unsigned)size)) > 0) { + size -= ret; + ptr += ret; + } + if (size || ret <= 0) { + Term(); + return Error = errno ? errno : MBDB_WRITE_ERROR; + } + + Error = Eof = 0; + return Error; + } + +protected: + int WritePages(iovec* vec, int nvec) { + if (Error || Memo) + return Error; + + ssize_t size, delta; + iovec* pvec; + int vsize; + + for (vsize = 0, pvec = vec; vsize < nvec; vsize++, pvec++) + for (size = 0; (size_t)size < pvec->iov_len; size += Pagesize) + ((TDatPage*)((char*)pvec->iov_base + size))->PageSig = PAGESIG; + + delta = size = 0; + pvec = vec; + vsize = nvec; + while (vsize && (size = Writev(FileManip, pvec, (int)Min(vsize, 16))) > 0) { + if (delta) { + size += delta; + pvec->iov_len += delta; + pvec->iov_base = (char*)pvec->iov_base - delta; + delta = 0; + } + while (size) { + if ((size_t)size >= pvec->iov_len) { + size -= pvec->iov_len; + ++pvec; + --vsize; + } else { + delta = size; + pvec->iov_len -= size; + pvec->iov_base = (char*)pvec->iov_base + size; + size = 0; + } + } + } + if (delta) { + pvec->iov_len += delta; + pvec->iov_base = (char*)pvec->iov_base - delta; + } + return Error = (!size && !vsize) ? 0 : errno ? errno : MBDB_WRITE_ERROR; + } + + i64 Tell() { + return FileManip.RealSeek(0, SEEK_CUR); + } + + int GotoPage(int pageno) { + if (Error || Memo) + return Error; + Eof = 0; + i64 offset = (i64)pageno * Pagesize + METASIZE; + if (offset != FileManip.Seek(offset, SEEK_SET)) + Error = MBDB_BAD_FILE_SIZE; + return Error; + } + + int Term() { + int ret = FileManip.Close(); + Eof = 1; + Memo = 0; + if (!Error) + Error = ret; + return Error; + } + + size_t Pagesize; + int Eof; + int Error; + int Memo; + ui32 Recordsig; + +private: + TFileManip FileManip; +}; + +using TOutputPageFile = TOutputPageFileImpl<TOutputFileManip>; + +template <class TVal, + typename TBaseRecIter = TOutputRecordIterator<TVal, TOutputPageIterator<TOutputPageFile>>> +class TOutDatFileImpl: public TBaseRecIter { +public: + typedef TBaseRecIter TRecIter; + typedef typename TRecIter::TPageIter TPageIter; + typedef typename TRecIter::TPageIter::TWriter TWriter; + + int Open(const char* fname, size_t pagesize, size_t pages = 1, int pagesOrBytes = 1, bool direct = false) { + int ret = TWriter::Init(fname, pagesize, TVal::RecordSig, direct); + return ret ? ret : Open2(pages, pagesOrBytes); + } + + int Open(const TFile& file, size_t pagesize, size_t pages = 1, int pagesOrBytes = 1) { + int ret = TWriter::Init(file, pagesize, TVal::RecordSig); + return ret ? ret : Open2(pages, pagesOrBytes); + } + + int Open(TAutoPtr<IOutputStream> output, size_t pagesize, size_t pages = 1, int pagesOrBytes = 1) { + int ret = TWriter::Init(output, pagesize, TVal::RecordSig); + return ret ? ret : Open2(pages, pagesOrBytes); + } + + int Close() { + int ret1 = TRecIter::Term(); + int ret2 = TPageIter::Term(); + int ret3 = TWriter::Term(); + return ret1 ? ret1 : ret2 ? ret2 : ret3; + } + +private: + int Open2(size_t pages, int pagesOrBytes) { + int ret = TPageIter::Init(pages, pagesOrBytes); + if (!ret) + ret = TRecIter::Init(); + if (ret) + Close(); + return ret; + } +}; + +template <class TVal> +class TOutIndexFile: public TOutDatFileImpl< + TVal, + TOutputRecordIterator<TVal, TOutputPageIterator<TOutputPageFile>, TCallbackIndexer, TFakeCompression>> { + typedef TOutDatFileImpl< + TVal, + TOutputRecordIterator<TVal, TOutputPageIterator<TOutputPageFile>, TCallbackIndexer, TFakeCompression>> + TDatFile; + typedef TOutIndexFile<TVal> TMyType; + typedef typename TDatFile::TRecIter TRecIter; + typedef typename TRecIter::TPageIter TPageIter; + typedef typename TRecIter::TIndexer TIndexer; + +public: + TOutIndexFile() { + TIndexer::SetCallback(this, DispatchCallback); + } + + int Open(const char* fname, size_t pagesize, size_t pages, int pagesOrBytes = 1) { + int ret = TDatFile::Open(fname, pagesize, pages, pagesOrBytes); + if (ret) + return ret; + if ((ret = TRecIter::GotoPage(1))) { + TDatFile::Close(); + return ret; + } + Index0.Clear(); + return ret; + } + + int Close() { + TPageIter::Unfreeze(); + if (TRecIter::RecNum) { + TRecIter::Flush(); + NextPage(TPageIter::Current()); + } + int ret = 0; + if (Index0.Size() && !(ret = TRecIter::GotoPage(0))) { + const char* ptr = Index0.Begin(); + size_t recSize; + while (ptr < Index0.End()) { + Y_ASSERT((size_t)(Index0.End() - ptr) >= sizeof(size_t)); + memcpy(&recSize, ptr, sizeof(size_t)); + ptr += sizeof(size_t); + Y_ASSERT((size_t)(Index0.End() - ptr) >= recSize); + ui8* buf = (ui8*)TRecIter::Reserve(recSize); + if (!buf) { + ret = MBDB_PAGE_OVERFLOW; + break; + } + memcpy(buf, ptr, recSize); + TRecIter::ResetDat(); + ptr += recSize; + } + Index0.Clear(); + ret = (TPageIter::GetPageNum() != 0) ? MBDB_PAGE_OVERFLOW : TPageIter::GetError(); + } + int ret1 = TDatFile::Close(); + return ret ? ret : ret1; + } + +protected: + TBuffer Index0; + + void NextPage(const TDatPage* page) { + const TVal* first = (const TVal*)NMicroBDB::GetFirstRecord(page); + size_t sz; + if (!TExtInfoType<TVal>::Exists) { + sz = SizeOf(first); + } else { + size_t ll; + size_t l; + sz = NMicroBDB::SizeOfExt(first, &ll, &l); + sz += ll + l; + } + Index0.Append((const char*)&sz, sizeof(size_t)); + Index0.Append((const char*)first, sz); + } + + static void DispatchCallback(void* This, const TDatPage* page) { + ((TMyType*)This)->NextPage(page); + } +}; + +template <class TVal, class TKey, typename TCompressor = TFakeCompression, class TPageFile = TOutputPageFile> +class TOutDirectFileImpl: public TOutDatFileImpl< + TVal, + TOutputRecordIterator<TVal, TOutputPageIterator<TPageFile>, TCallbackIndexer, TCompressor>> { + typedef TOutDatFileImpl< + TVal, + TOutputRecordIterator<TVal, TOutputPageIterator<TPageFile>, TCallbackIndexer, TCompressor>> + TDatFile; + typedef TOutDirectFileImpl<TVal, TKey, TCompressor, TPageFile> TMyType; + typedef typename TDatFile::TRecIter TRecIter; + typedef typename TRecIter::TPageIter TPageIter; + typedef typename TRecIter::TIndexer TIndexer; + typedef TOutIndexFile<TKey> TKeyFile; + +public: + TOutDirectFileImpl() { + TIndexer::SetCallback(this, DispatchCallback); + } + + int Open(const char* fname, size_t pagesize, int pages = 1, size_t ipagesize = 0, size_t ipages = 1, int pagesOrBytes = 1) { + char iname[FILENAME_MAX]; + int ret; + if (ipagesize == 0) + ipagesize = pagesize; + ret = TDatFile::Open(fname, pagesize, pages, pagesOrBytes); + ret = ret ? ret : DatNameToIdx(iname, fname); + ret = ret ? ret : KeyFile.Open(iname, ipagesize, ipages, pagesOrBytes); + if (ret) + TDatFile::Close(); + return ret; + } + + int Close() { + if (TRecIter::RecNum) { + TRecIter::Flush(); + NextPage(TPageIter::Current()); + } + int ret = KeyFile.Close(); + int ret1 = TDatFile::Close(); + return ret1 ? ret1 : ret; + } + + int GetError() const { + return TDatFile::GetError() ? TDatFile::GetError() : KeyFile.GetError(); + } + +protected: + TKeyFile KeyFile; + + void NextPage(const TDatPage* page) { + typedef TMakeExtKey<TVal, TKey> TMakeExtKey; + + TVal* val = (TVal*)NMicroBDB::GetFirstRecord(page); + TKey key; + if (!TMakeExtKey::Exists) { + TMakeExtKey::Make(&key, nullptr, val, nullptr); + KeyFile.Push(&key); + } else { + size_t ll; + size_t l; + size_t sz = NMicroBDB::SizeOfExt(val, &ll, &l); + typename TExtInfoType<TVal>::TResult valExt; + if (TExtInfoType<TVal>::Exists) + Y_PROTOBUF_SUPPRESS_NODISCARD valExt.ParseFromArray((ui8*)val + sz + ll, l); + typename TExtInfoType<TKey>::TResult keyExt; + TMakeExtKey::Make(&key, &keyExt, val, &valExt); + KeyFile.Push(&key, &keyExt); + } + } + + static void DispatchCallback(void* This, const TDatPage* page) { + ((TMyType*)This)->NextPage(page); + } +}; diff --git a/library/cpp/microbdb/powersorter.h b/library/cpp/microbdb/powersorter.h new file mode 100644 index 0000000000..c40de9c23f --- /dev/null +++ b/library/cpp/microbdb/powersorter.h @@ -0,0 +1,667 @@ +#pragma once + +#include "safeopen.h" + +#include <util/generic/vector.h> +#include <util/generic/deque.h> +#include <util/system/mutex.h> +#include <util/system/condvar.h> +#include <util/thread/pool.h> + +template < + class TRecord, + template <typename T> class TCompare, + class TSieve, + class TMemoFile = TOutDatFile<TRecord>> +class TDatSorterBuf { +public: + typedef TRecord TRec; + typedef TVector<TRec*> TVectorType; + typedef TMemoFile TMemo; + typedef TCompare<TRecord> TComp; + +public: + TDatSorterBuf(size_t memory, size_t pageSize) + : Memo("memo", pageSize, memory, 0) + , Cur() + { + Memo.Open(nullptr); + Memo.Freeze(); + } + + ~TDatSorterBuf() { + Vector.clear(); + Memo.Close(); + } + + const TRec* Push(const TRec* v) { + const TRec* u = Memo.Push(v); + if (u) + Vector.push_back((TRec*)u); + return u; + } + + const TRec* Next() { + if (Ptr == Vector.end()) { + if (Cur) + TSieve::Sieve(Cur, Cur); + Cur = nullptr; + } else { + Cur = *Ptr++; + if (!TIsSieveFake<TSieve>::Result) + while (Ptr != Vector.end() && TSieve::Sieve(Cur, *Ptr)) + ++Ptr; + } + return Cur; + } + + const TRec* Current() { + return Cur; + } + + size_t Size() { + return Vector.size(); + } + + void Sort() { + Ptr = Vector.begin(); + Cur = nullptr; + + MBDB_SORT_FUN(Vector.begin(), Vector.end(), TComp()); + } + + void Clear() { + Vector.clear(); + Memo.Freeze(); + Ptr = Vector.begin(); + Cur = nullptr; + } + +private: + TVectorType Vector; + TMemo Memo; + + typename TVectorType::iterator + Ptr; + TRec* Cur; +}; + +template < + class TRecord, + class TInput, + template <typename T> class TCompare, + class TSieve> +class TDatMerger { +public: + typedef TRecord TRec; + typedef TCompare<TRecord> TComp; + typedef TSimpleSharedPtr<TInput> TInputPtr; + typedef TVector<TInputPtr> TInputVector; + +public: + ~TDatMerger() { + Close(); + } + + void Init(const TInputVector& inputs) { + Inputs = inputs; + TVector<TInput*> v; + for (int i = 0; i < Inputs.ysize(); ++i) + v.push_back(Inputs[i].Get()); + HeapIter.Init(&v[0], v.size()); + if (!TIsSieveFake<TSieve>::Result) + PNext = HeapIter.Next(); + } + + const TRec* Next() { + if (TIsSieveFake<TSieve>::Result) { + return HeapIter.Next(); + } + + if (!PNext) { + if (PCur) { + TSieve::Sieve(PCur, PCur); + PCur = nullptr; + } + return nullptr; + } + + PCur = &Cur; + memcpy(PCur, PNext, SizeOf((const TRec*)PNext)); + + do { + PNext = HeapIter.Next(); + } while (PNext && TSieve::Sieve(PCur, PNext)); + + return PCur; + } + + const TRec* Current() { + return (TIsSieveFake<TSieve>::Result ? HeapIter.Current() : PCur); + } + + void Close() { + Inputs.clear(); + HeapIter.Term(); + } + +private: + TInputVector Inputs; + THeapIter<TRec, TInput, TComp> HeapIter; + TRec Cur; + TRec* PCur = nullptr; + const TRec* PNext = nullptr; +}; + +class TPortionManager { +public: + void Open(const char* tempDir) { + TGuard<TMutex> guard(Mutex); + TempDir = tempDir; + } + + TString Next() { + TGuard<TMutex> guard(Mutex); + if (Portions == 0) + DoOpen(); + TString fname = GeneratePortionFilename(Portions++); + return fname; + } + + void Close() { + TGuard<TMutex> guard(Mutex); + Portions = 0; + } + +private: + void DoOpen() { + if (MakeSorterTempl(PortionFilenameTempl, TempDir.data())) { + PortionFilenameTempl[0] = 0; + ythrow yexception() << "portion-manager: bad tempdir \"" << TempDir.data() << "\": " << LastSystemErrorText(); + } + } + + TString GeneratePortionFilename(int i) { + char str[FILENAME_MAX]; + snprintf(str, sizeof(str), PortionFilenameTempl, i); + return TString(str); + } + +private: + TMutex Mutex; + + TString TempDir; + char PortionFilenameTempl[FILENAME_MAX] = {}; + int Portions = 0; +}; + +// A merger powered by threads +template < + class TRecord, + template <typename T> class TCompare, + class TSieve, + class TInput = TInDatFile<TRecord>, + class TOutput = TOutDatFile<TRecord>> +class TPowerMerger { +public: + typedef TRecord TRec; + typedef TDatMerger<TRecord, TInput, TCompare, TSieve> TMerger; + typedef TSimpleSharedPtr<TMerger> TMergerPtr; + typedef TPowerMerger<TRecord, TCompare, TSieve, TInput, TOutput> TFileMerger; + + struct TMergePortionTask: public IObjectInQueue { + TFileMerger* FileMerger; + int Begin; + int End; + TString OutFname; + + TMergePortionTask(TFileMerger* fileMerger, int begin, int end, const TString& outFname) + : FileMerger(fileMerger) + , Begin(begin) + , End(end) + , OutFname(outFname) + { + } + + void Process(void*) override { + THolder<TMergePortionTask> This(this); + //fprintf(stderr, "MergePortion: (%i, %i, %s)\n", Begin, End, ~OutFname); + FileMerger->MergePortion(Begin, End, OutFname); + } + }; + +public: + TPowerMerger(const TSimpleSharedPtr<TThreadPool>& mtpQueue, const TSimpleSharedPtr<TPortionManager>& portMan, + int memory, int pageSize, bool autoUnlink) + : MtpQueue(mtpQueue) + , PortionManager(portMan) + , Memory(memory) + , PageSize(pageSize) + , AutoUnlink(autoUnlink) + { + } + + TPowerMerger(const TSimpleSharedPtr<TThreadPool>& mtpQueue, const char* tempDir, + int memory, int pageSize, bool autoUnlink) + : MtpQueue(mtpQueue) + , PortionManager(new TPortionManager) + , Memory(memory) + , PageSize(pageSize) + , AutoUnlink(autoUnlink) + { + PortionManager->Open(tempDir); + } + + ~TPowerMerger() { + Close(); + } + + void SetMtpQueue(const TSimpleSharedPtr<TThreadPool>& mtpQueue) { + MtpQueue = mtpQueue; + } + + void MergePortion(int begin, int end, const TString& outFname) { + TMerger merger; + InitMerger(merger, begin, end); + + TOutput out("mergeportion-tmpout", PageSize, BufSize, 0); + out.Open(outFname.data()); + const TRec* rec; + while ((rec = merger.Next())) + out.Push(rec); + out.Close(); + + merger.Close(); + + { + TGuard<TMutex> guard(Mutex); + UnlinkFiles(begin, end); + Files.push_back(outFname); + --Tasks; + TaskFinishedCond.Signal(); + } + } + + void Add(const TString& fname) { + TGuard<TMutex> guard(Mutex); + // fprintf(stderr, "TPowerMerger::Add: %s\n", ~fname); + Files.push_back(fname); + if (InitialFilesEnd > 0) + ythrow yexception() << "TPowerMerger::Add: no more files allowed"; + } + + void Merge(int maxPortions) { + TGuard<TMutex> guard(Mutex); + InitialFilesEnd = Files.ysize(); + if (!InitialFilesEnd) + ythrow yexception() << "TPowerMerger::Merge: no files added"; + Optimize(maxPortions); + MergeMT(); + InitMerger(Merger, CPortions, Files.ysize()); + } + + void Close() { + TGuard<TMutex> guard(Mutex); + Merger.Close(); + UnlinkFiles(CPortions, Files.ysize()); + InitialFilesEnd = CPortions = 0; + Files.clear(); + } + + const TRec* Next() { + return Merger.Next(); + } + + const TRec* Current() { + return Merger.Current(); + } + + int FileCount() const { + TGuard<TMutex> guard(Mutex); + return Files.ysize(); + } + +private: + void InitMerger(TMerger& merger, int begin, int end) { + TGuard<TMutex> guard(Mutex); + TVector<TSimpleSharedPtr<TInput>> inputs; + for (int i = begin; i < end; ++i) { + inputs.push_back(new TInput("mergeportion-tmpin", BufSize, 0)); + inputs.back()->Open(Files[i]); + // fprintf(stderr, "InitMerger: %i, %s\n", i, ~Files[i]); + } + merger.Init(inputs); + } + + void UnlinkFiles(int begin, int end) { + TGuard<TMutex> guard(Mutex); + for (int i = begin; i < end; ++i) { + if (i >= InitialFilesEnd || AutoUnlink) + unlink(Files[i].c_str()); + } + } + + void Optimize(int maxPortions, size_t maxBufSize = 4u << 20) { + TGuard<TMutex> guard(Mutex); + maxPortions = std::min(maxPortions, Memory / PageSize - 1); + maxBufSize = std::max((size_t)PageSize, maxBufSize); + + if (maxPortions <= 2) { + FPortions = MPortions = 2; + BufSize = PageSize; + return; + } + + int Portions = Files.ysize(); + if (maxPortions >= Portions) { + FPortions = MPortions = Portions; + } else if (((Portions + maxPortions - 1) / maxPortions) <= maxPortions) { + while (((Portions + maxPortions - 1) / maxPortions) <= maxPortions) + --maxPortions; + MPortions = ++maxPortions; + int total = ((Portions + MPortions - 1) / MPortions) + Portions; + FPortions = (total % MPortions) ? (total % MPortions) : (int)MPortions; + } else + FPortions = MPortions = maxPortions; + + BufSize = std::min((size_t)(Memory / (MPortions + 1)), maxBufSize); + // fprintf(stderr, "Optimize: Portions=%i; MPortions=%i; FPortions=%i; Memory=%i; BufSize=%i\n", + // (int)Portions, (int)MPortions, (int)FPortions, (int)Memory, (int)BufSize); + } + + void MergeMT() { + TGuard<TMutex> guard(Mutex); + do { + int n; + while ((n = Files.ysize() - CPortions) > MPortions) { + int m = std::min((CPortions == 0 ? (int)FPortions : (int)MPortions), n); + TString fname = PortionManager->Next(); + if (!MtpQueue->Add(new TMergePortionTask(this, CPortions, CPortions + m, fname))) + ythrow yexception() << "TPowerMerger::MergeMT: failed to add task"; + CPortions += m; + ++Tasks; + } + if (Tasks > 0) + TaskFinishedCond.Wait(Mutex); + } while (Tasks > 0); + } + +private: + TMutex Mutex; + TCondVar TaskFinishedCond; + + TMerger Merger; + TSimpleSharedPtr<TThreadPool> MtpQueue; + TSimpleSharedPtr<TPortionManager> PortionManager; + TVector<TString> Files; + int Tasks = 0; + int InitialFilesEnd = 0; + int CPortions = 0; + int MPortions = 0; + int FPortions = 0; + int Memory = 0; + int PageSize = 0; + int BufSize = 0; + bool AutoUnlink = false; +}; + +// A sorter powered by threads +template < + class TRecord, + template <typename T> class TCompare, + class TSieve = TFakeSieve<TRecord>, + class TTmpInput = TInDatFile<TRecord>, + class TTmpOutput = TOutDatFile<TRecord>> +class TPowerSorter { +public: + typedef TPowerSorter<TRecord, TCompare, TSieve, TTmpInput, TTmpOutput> TSorter; + typedef TRecord TRec; + typedef TTmpOutput TTmpOut; + typedef TTmpInput TTmpIn; + typedef TDatSorterBuf<TRecord, TCompare, TSieve> TSorterBuf; + typedef TCompare<TRecord> TComp; + typedef TPowerMerger<TRecord, TCompare, TSieve, TTmpInput, TTmpOutput> TFileMerger; + + struct TSortPortionTask: public IObjectInQueue { + TSorter* Sorter; + TSorterBuf* SorterBuf; + int Portion; + + TSortPortionTask(TSorter* sorter, TSorterBuf* sorterBuf, int portion) + : Sorter(sorter) + , SorterBuf(sorterBuf) + , Portion(portion) + { + } + + void Process(void*) override { + TAutoPtr<TSortPortionTask> This(this); + // fprintf(stderr, "SortPortion: %i\n", Portion); + Sorter->SortPortion(SorterBuf); + } + }; + + class TSorterBufQueue { + private: + TMutex Mutex; + TCondVar Cond; + TVector<TSimpleSharedPtr<TSorterBuf>> V; + TDeque<TSorterBuf*> Q; + + int Memory, PageSize, MaxSorterBufs; + + public: + TSorterBufQueue(int memory, int pageSize, int maxSorterBufs) + : Memory(memory) + , PageSize(pageSize) + , MaxSorterBufs(maxSorterBufs) + { + } + + void Push(TSorterBuf* sb) { + TGuard<TMutex> guard(Mutex); + sb->Clear(); + Q.push_back(sb); + Cond.Signal(); + } + + TSorterBuf* Pop() { + TGuard<TMutex> guard(Mutex); + if (!Q.size() && V.ysize() < MaxSorterBufs) { + V.push_back(new TSorterBuf(Memory / MaxSorterBufs, PageSize)); + return V.back().Get(); + } else { + while (!Q.size()) + Cond.Wait(Mutex); + TSorterBuf* t = Q.front(); + Q.pop_front(); + return t; + } + } + + void Clear() { + TGuard<TMutex> guard(Mutex); + Q.clear(); + V.clear(); + } + + void WaitAll() { + TGuard<TMutex> guard(Mutex); + while (Q.size() < V.size()) { + Cond.Wait(Mutex); + } + } + + int GetMaxSorterBufs() const { + return MaxSorterBufs; + } + }; + +public: + TPowerSorter(const TSimpleSharedPtr<TThreadPool>& mtpQueue, size_t maxSorterBufs, + const char* name, size_t memory, size_t pageSize, size_t bufSize) + : MaxSorterBufs(maxSorterBufs) + , Name(name) + , Memory(memory) + , PageSize(pageSize) + , BufSize(bufSize) + , MtpQueue(mtpQueue) + , PortionManager(new TPortionManager) + , SBQueue(Memory, PageSize, MaxSorterBufs) + , FileMerger(MtpQueue, PortionManager, Memory, PageSize, true) + { + } + + TPowerSorter(size_t maxSorterBufs, + const char* name, size_t memory, size_t pageSize, size_t bufSize) + : MaxSorterBufs(maxSorterBufs) + , Name(name) + , Memory(memory) + , PageSize(pageSize) + , BufSize(bufSize) + , PortionManager(new TPortionManager) + , SBQueue(Memory, PageSize, maxSorterBufs) + , FileMerger(MtpQueue, PortionManager, Memory, PageSize, true) + { + } + + TPowerSorter(const char* name, size_t memory, size_t pageSize, size_t bufSize) + : MaxSorterBufs(5) + , Name(name) + , Memory(memory) + , PageSize(pageSize) + , BufSize(bufSize) + , PortionManager(new TPortionManager) + , SBQueue(Memory, PageSize, MaxSorterBufs) + , FileMerger(MtpQueue, PortionManager, Memory, PageSize, true) + { + } + + ~TPowerSorter() { + Close(); + } + + void Open(const char* tempDir) { + Close(); + CurSB = SBQueue.Pop(); + PortionManager->Open(tempDir); + } + + void Reopen(const char* fname) { + Open(fname); + } + + void Close() { + CurSB = nullptr; + SBQueue.Clear(); + PortionCount = 0; + FileMerger.Close(); + PortionManager->Close(); + } + + const TRec* Push(const TRec* v) { + CheckOpen("Push"); + const TRec* u = CurSB->Push(v); + if (!u) { + NextPortion(); + u = CurSB->Push(v); + } + return u; + } + + void Sort(int maxPortions = 1000) { + CheckOpen("Sort"); + if (!PortionCount) { + CurSB->Sort(); + } else { + NextPortion(); + SBQueue.Push(CurSB); + CurSB = nullptr; + SBQueue.WaitAll(); + SBQueue.Clear(); + FileMerger.Merge(maxPortions); + } + } + + const TRec* Next() { + return PortionCount ? FileMerger.Next() : CurSB->Next(); + } + + const TRec* Current() { + return PortionCount ? FileMerger.Current() : CurSB->Current(); + } + + int GetBufSize() const { + return BufSize; + } + + int GetPageSize() const { + return PageSize; + } + + const char* GetName() const { + return Name.data(); + } + +private: + void CheckOpen(const char* m) { + if (!CurSB) + ythrow yexception() << "TPowerSorter::" << m << ": the sorter is not open"; + } + + void NextPortion() { + if (!CurSB->Size()) + return; + ++PortionCount; + if (MaxSorterBufs <= 1) { + SortPortion(CurSB); + } else { + if (!MtpQueue.Get()) { + MtpQueue.Reset(new TThreadPool); + MtpQueue->Start(MaxSorterBufs - 1); + FileMerger.SetMtpQueue(MtpQueue); + } + if (!MtpQueue->Add(new TSortPortionTask(this, CurSB, PortionCount))) + ythrow yexception() << "TPowerSorter::NextPortion: failed to add task"; + } + CurSB = SBQueue.Pop(); + } + + void SortPortion(TSorterBuf* sorterBuf) { + TString portionFilename = PortionManager->Next(); + try { + sorterBuf->Sort(); + + // fprintf(stderr, "TPowerSorter::SortPortion: -> %s\n", ~portionFilename); + TTmpOut out("powersorter-portion", PageSize, BufSize, 0); + out.Open(portionFilename.data()); + + while (sorterBuf->Next()) + out.Push(sorterBuf->Current()); + + out.Close(); + FileMerger.Add(portionFilename); + SBQueue.Push(sorterBuf); + } catch (const yexception& e) { + unlink(portionFilename.data()); + ythrow yexception() << "SortPortion: " << e.what(); + } + } + +private: + int MaxSorterBufs = 0; + TString Name; + int Memory = 0; + int PageSize = 0; + int BufSize = 0; + + TMutex Mutex; + TSimpleSharedPtr<TThreadPool> MtpQueue; + TSimpleSharedPtr<TPortionManager> PortionManager; + + TSorterBufQueue SBQueue; + TSorterBuf* CurSB = nullptr; + int PortionCount = 0; + + TFileMerger FileMerger; +}; diff --git a/library/cpp/microbdb/reader.h b/library/cpp/microbdb/reader.h new file mode 100644 index 0000000000..694a2f1766 --- /dev/null +++ b/library/cpp/microbdb/reader.h @@ -0,0 +1,354 @@ +#pragma once + +#include "align.h" +#include "header.h" +#include "extinfo.h" + +#include <contrib/libs/zlib/zlib.h> +#include <contrib/libs/fastlz/fastlz.h> +#include <contrib/libs/snappy/snappy.h> + +#include <util/generic/vector.h> +#include <util/memory/tempbuf.h> + +namespace NMicroBDB { + static const size_t DEFAULT_BUFFER_SIZE = (64 << 10); + + //! + template <class TVal> + class IBasePageReader { + public: + virtual size_t GetRecSize() const = 0; + virtual size_t GetExtSize() const = 0; + virtual bool GetExtInfo(typename TExtInfoType<TVal>::TResult* extInfo) const = 0; + virtual const ui8* GetExtInfoRaw(size_t* len) const = 0; + virtual const TVal* Next() = 0; + virtual void Reset() = 0; + //! set clearing flag, so temporary buffers will be cleared + //! in next call of Next() + virtual void SetClearFlag() { + } + + virtual ~IBasePageReader() { + } + }; + + template <class TVal, typename TPageIter> + class TRawPageReader: public IBasePageReader<TVal> { + public: + TRawPageReader(TPageIter* const iter) + : PageIter(iter) + { + Reset(); + } + + bool GetExtInfo(typename TExtInfoType<TVal>::TResult* extInfo) const override { + Y_VERIFY(TExtInfoType<TVal>::Exists, "GetExtInfo should only be used with extended records"); + if (!Rec) + return false; + ui8* raw = (ui8*)Rec + RecSize + ExtLenSize; + return extInfo->ParseFromArray(raw, ExtSize); + } + + size_t GetRecSize() const override { + return RecSize + ExtLenSize; + } + + size_t GetExtSize() const override { + return ExtSize; + } + + const ui8* GetExtInfoRaw(size_t* len) const override { + Y_VERIFY(TExtInfoType<TVal>::Exists, "GetExtInfo should only be used with extended records"); + if (!Rec) { + *len = 0; + return nullptr; + } + *len = ExtLenSize + ExtSize; + return (ui8*)Rec + RecSize; + } + + const TVal* Next() override { + if (!Rec) + Rec = (TVal*)((char*)PageIter->Current() + sizeof(TDatPage)); + else + Rec = (TVal*)((char*)Rec + DatCeil(RecSize + ExtLenSize + ExtSize)); + if (!TExtInfoType<TVal>::Exists) + RecSize = SizeOf(Rec); + else + RecSize = SizeOfExt(Rec, &ExtLenSize, &ExtSize); + return Rec; + } + + void Reset() override { + Rec = nullptr; + RecSize = 0; + ExtLenSize = 0; + ExtSize = 0; + } + + private: + const TVal* Rec; + size_t RecSize; + size_t ExtLenSize; + size_t ExtSize; + TPageIter* const PageIter; + }; + + template <class TVal, typename TPageIter> + class TCompressedReader: public IBasePageReader<TVal> { + inline size_t GetFirstRecordSize(const TVal* const in) const { + if (!TExtInfoType<TVal>::Exists) { + return DatCeil(SizeOf(in)); + } else { + size_t ll; + size_t l; + size_t ret = SizeOfExt(in, &ll, &l); + + return DatCeil(ret + ll + l); + } + } + + void DecompressBlock() { + if (PageIter->IsFrozen() && Buffer.Get()) + Blocks.push_back(Buffer.Release()); + + const TCompressedHeader* hdr = (const TCompressedHeader*)(Page); + + Page += sizeof(TCompressedHeader); + + const size_t first = GetFirstRecordSize((const TVal*)Page); + + if (!Buffer.Get() || Buffer->Size() < hdr->Original) + Buffer.Reset(new TTempBuf(Max<size_t>(hdr->Original, DEFAULT_BUFFER_SIZE))); + + memcpy(Buffer->Data(), Page, first); + Page += first; + + if (hdr->Count > 1) { + switch (Algo) { + case MBDB_COMPRESSION_ZLIB: { + uLongf dst = hdr->Original - first; + + int ret = uncompress((Bytef*)Buffer->Data() + first, &dst, Page, hdr->Compressed); + + if (ret != Z_OK) + ythrow yexception() << "error then uncompress " << ret; + } break; + case MBDB_COMPRESSION_FASTLZ: { + int dst = hdr->Original - first; + int ret = yfastlz_decompress(Page, hdr->Compressed, Buffer->Data() + first, dst); + + if (!ret) + ythrow yexception() << "error then uncompress"; + } break; + case MBDB_COMPRESSION_SNAPPY: { + if (!snappy::RawUncompress((const char*)Page, hdr->Compressed, Buffer->Data() + first)) + ythrow yexception() << "error then uncompress"; + } break; + } + } + + Rec = nullptr; + RecNum = hdr->Count; + Page += hdr->Compressed; + } + + void ClearBuffer() { + for (size_t i = 0; i < Blocks.size(); ++i) + delete Blocks[i]; + Blocks.clear(); + ClearFlag = false; + } + + public: + TCompressedReader(TPageIter* const iter) + : Rec(nullptr) + , RecSize(0) + , ExtLenSize(0) + , ExtSize(0) + , Page(nullptr) + , PageIter(iter) + , RecNum(0) + , BlockNum(0) + , ClearFlag(false) + { + } + + ~TCompressedReader() override { + ClearBuffer(); + } + + size_t GetRecSize() const override { + return RecSize + ExtLenSize; + } + + size_t GetExtSize() const override { + return ExtSize; + } + + bool GetExtInfo(typename TExtInfoType<TVal>::TResult* extInfo) const override { + Y_VERIFY(TExtInfoType<TVal>::Exists, "GetExtInfo should only be used with extended records"); + if (!Rec) + return false; + ui8* raw = (ui8*)Rec + RecSize + ExtLenSize; + return extInfo->ParseFromArray(raw, ExtSize); + } + + const ui8* GetExtInfoRaw(size_t* len) const override { + Y_VERIFY(TExtInfoType<TVal>::Exists, "GetExtInfo should only be used with extended records"); + if (!Rec) { + *len = 0; + return nullptr; + } + *len = ExtLenSize + ExtSize; + return (ui8*)Rec + RecSize; + } + + const TVal* Next() override { + Y_ASSERT(RecNum >= 0); + + if (ClearFlag) + ClearBuffer(); + + if (!Page) { + if (!PageIter->Current()) + return nullptr; + + Page = (ui8*)PageIter->Current() + sizeof(TDatPage); + + BlockNum = ((TCompressedPage*)Page)->BlockCount - 1; + Algo = (ECompressionAlgorithm)((TCompressedPage*)Page)->Algorithm; + Page += sizeof(TCompressedPage); + + DecompressBlock(); + } + + if (!RecNum) { + if (BlockNum <= 0) + return nullptr; + else { + --BlockNum; + DecompressBlock(); + } + } + + --RecNum; + if (!Rec) + Rec = (const TVal*)Buffer->Data(); + else + Rec = (const TVal*)((char*)Rec + DatCeil(RecSize + ExtLenSize + ExtSize)); + + if (!TExtInfoType<TVal>::Exists) + RecSize = SizeOf(Rec); + else + RecSize = SizeOfExt(Rec, &ExtLenSize, &ExtSize); + + return Rec; + } + + void Reset() override { + Page = nullptr; + BlockNum = 0; + Rec = nullptr; + RecSize = 0; + ExtLenSize = 0; + ExtSize = 0; + RecNum = 0; + } + + void SetClearFlag() override { + ClearFlag = true; + } + + public: + THolder<TTempBuf> Buffer; + TVector<TTempBuf*> Blocks; + const TVal* Rec; + size_t RecSize; + size_t ExtLenSize; + size_t ExtSize; + const ui8* Page; + TPageIter* const PageIter; + int RecNum; //!< count of recs in current block + int BlockNum; + ECompressionAlgorithm Algo; + bool ClearFlag; + }; + + class TZLibCompressionImpl { + public: + static const ECompressionAlgorithm Code = MBDB_COMPRESSION_ZLIB; + + inline void Init() { + // - + } + + inline void Term() { + // - + } + + inline size_t CompressBound(size_t size) const noexcept { + return ::compressBound(size); + } + + inline void Compress(void* out, size_t& outSize, const void* in, size_t inSize) { + uLongf size = outSize; + + if (compress((Bytef*)out, &size, (const Bytef*)in, inSize) != Z_OK) + ythrow yexception() << "not compressed"; + outSize = size; + } + }; + + class TFastlzCompressionImpl { + public: + static const ECompressionAlgorithm Code = MBDB_COMPRESSION_FASTLZ; + + inline void Init() { + // - + } + + inline void Term() { + // - + } + + inline size_t CompressBound(size_t size) const noexcept { + size_t rval = size_t(size * 1.07); + return rval < 66 ? 66 : rval; + } + + inline void Compress(void* out, size_t& outSize, const void* in, size_t inSize) { + outSize = yfastlz_compress_level(2, in, inSize, out); + if (!outSize) + ythrow yexception() << "not compressed"; + } + }; + + class TSnappyCompressionImpl { + public: + static const ECompressionAlgorithm Code = MBDB_COMPRESSION_SNAPPY; + + inline void Init() { + // - + } + + inline void Term() { + // - + } + + inline size_t CompressBound(size_t size) const noexcept { + return snappy::MaxCompressedLength(size); + } + + inline void Compress(void* out, size_t& outSize, const void* in, size_t inSize) { + snappy::RawCompress((const char*)in, inSize, (char*)out, &outSize); + } + }; + +} + +using TFakeCompression = void; +using TZLibCompression = NMicroBDB::TZLibCompressionImpl; +using TFastlzCompression = NMicroBDB::TFastlzCompressionImpl; +using TSnappyCompression = NMicroBDB::TSnappyCompressionImpl; diff --git a/library/cpp/microbdb/safeopen.h b/library/cpp/microbdb/safeopen.h new file mode 100644 index 0000000000..c328ffd575 --- /dev/null +++ b/library/cpp/microbdb/safeopen.h @@ -0,0 +1,792 @@ +#pragma once + +// util +#include <util/generic/yexception.h> +#include <util/generic/vector.h> +#include <util/string/util.h> +#include <util/system/mutex.h> +#include <thread> + +#include "microbdb.h" + +#if defined(_MSC_VER) +#pragma warning(push) +#pragma warning(disable : 4706) /*assignment within conditional expression*/ +#pragma warning(disable : 4267) /*conversion from 'size_t' to 'type', possible loss of data*/ +#endif + +template <typename TVal, typename TPageFile = TInputPageFile, typename TIterator = TInputPageIterator<TPageFile>> +class TInDatFile: protected TInDatFileImpl<TVal, TInputRecordIterator<TVal, TIterator>> { +public: + typedef TVal TRec; + typedef TInDatFileImpl<TVal, TInputRecordIterator<TVal, TIterator>> TBase; + + TInDatFile(const TString& name, size_t pages, int pagesOrBytes = 1) + : Name(name) + , Pages(pages) + , PagesOrBytes(pagesOrBytes) + { + } + + ~TInDatFile() { + Close(); + } + + void Open(const TString& fname, bool direct = false) { + ui32 gotRecordSig = 0; + int ret = TBase::Open(fname.data(), Pages, PagesOrBytes, &gotRecordSig, direct); + if (ret) { + // XXX: print record type name, not type sig + ythrow yexception() << ErrorMessage(ret, "Failed to open input file", fname, TVal::RecordSig, gotRecordSig); + } + Name = fname; + } + + void OpenStream(TAutoPtr<IInputStream> input) { + ui32 gotRecordSig = 0; + int ret = TBase::Open(input, Pages, PagesOrBytes, &gotRecordSig); + if (ret) { + // XXX: print record type name, not type sig + ythrow yexception() << ErrorMessage(ret, "Failed to open input file", Name, TVal::RecordSig, gotRecordSig); + } + } + + void Close() { + int ret; + if (IsOpen() && (ret = TBase::GetError())) + if (!std::uncaught_exception()) + ythrow yexception() << ErrorMessage(ret, "Error before closing input file", Name); + if ((ret = TBase::Close())) + if (!std::uncaught_exception()) + ythrow yexception() << ErrorMessage(ret, "Error while closing input file", Name); + } + + const char* GetName() const { + return Name.data(); + } + + using TBase::Current; + using TBase::Freeze; + using TBase::GetError; + using TBase::GetExtInfo; + using TBase::GetExtInfoRaw; + using TBase::GetExtSize; + using TBase::GetLastPage; + using TBase::GetPageNum; + using TBase::GetPageSize; + using TBase::GetRecSize; + using TBase::GotoLastPage; + using TBase::GotoPage; + using TBase::IsEof; + using TBase::IsOpen; + using TBase::Next; + using TBase::Skip; + using TBase::Unfreeze; + +protected: + TString Name; + size_t Pages; + int PagesOrBytes; +}; + +template <typename TVal> +class TMappedInDatFile: protected TInDatFileImpl<TVal, TInputRecordIterator<TVal, TMappedInputPageIterator<TMappedInputPageFile>>> { +public: + typedef TVal TRec; + typedef TInDatFileImpl<TVal, TInputRecordIterator<TVal, TMappedInputPageIterator<TMappedInputPageFile>>> TBase; + + TMappedInDatFile(const TString& name, size_t /* pages */, int /* pagesOrBytes */) + : Name(name) + { + } + + ~TMappedInDatFile() { + Close(); + } + + void Open(const TString& fname) { + int ret = TBase::Open(fname.data()); + if (ret) + ythrow yexception() << ErrorMessage(ret, "Failed to open mapped file", fname, TVal::RecordSig); + Name = fname; + } + + void Close() { + int ret; + if (IsOpen() && (ret = TBase::GetError())) + if (!std::uncaught_exception()) + ythrow yexception() << ErrorMessage(ret, "Error before closing mapped file", Name); + if ((ret = TBase::Close())) + if (!std::uncaught_exception()) + ythrow yexception() << ErrorMessage(ret, "Error while closing mapped file", Name); + } + + const char* GetName() const { + return Name.data(); + } + + using TBase::Current; + using TBase::GetError; + using TBase::GetExtInfo; + using TBase::GetExtInfoRaw; + using TBase::GetLastPage; + using TBase::GetPageNum; + using TBase::GetPageSize; + using TBase::GotoLastPage; + using TBase::GotoPage; + using TBase::IsEof; + using TBase::IsOpen; + using TBase::Next; + using TBase::Skip; + +protected: + TString Name; +}; + +template <typename TVal, typename TCompressor = TFakeCompression, typename TPageFile = TOutputPageFile> +class TOutDatFile: protected TOutDatFileImpl<TVal, TOutputRecordIterator<TVal, TOutputPageIterator<TPageFile>, TFakeIndexer, TCompressor>> { +public: + typedef TOutDatFileImpl<TVal, TOutputRecordIterator<TVal, TOutputPageIterator<TPageFile>, TFakeIndexer, TCompressor>> TBase; + + TOutDatFile(const TString& name, size_t pagesize, size_t pages, int pagesOrBytes = 1) + : Name(name) + , PageSize(pagesize) + , Pages(pages) + , PagesOrBytes(pagesOrBytes) + { + } + + ~TOutDatFile() { + Close(); + } + + void Open(const char* fname, bool direct = false) { + int ret = TBase::Open(fname, PageSize, Pages, PagesOrBytes, direct); + if (ret) + ythrow yexception() << ErrorMessage(ret, "Failed to open output file", fname); + Name = fname; + } + + void Open(const TString& fname) { + Open(fname.data()); + } + + void OpenStream(TAutoPtr<IOutputStream> output) { + int ret = TBase::Open(output, PageSize, Pages, PagesOrBytes); + if (ret) + ythrow yexception() << ErrorMessage(ret, "Failed to open output stream", Name); + } + + void Close() { + int ret; + if ((ret = TBase::GetError())) + if (!std::uncaught_exception()) + ythrow yexception() << ErrorMessage(ret, "Error before closing output file", Name); + if ((ret = TBase::Close())) + if (!std::uncaught_exception()) + ythrow yexception() << ErrorMessage(ret, "Error while closing output file", Name); + } + + const char* GetName() const { + return Name.data(); + } + + using TBase::Freeze; + using TBase::GetError; + using TBase::GetPageSize; + using TBase::IsEof; + using TBase::IsOpen; + using TBase::Offset; + using TBase::Push; + using TBase::PushWithExtInfo; + using TBase::Reserve; + using TBase::Unfreeze; + +protected: + TString Name; + size_t PageSize, Pages; + int PagesOrBytes; +}; + +template <typename TVal, typename TCompressor, typename TPageFile> +class TOutDatFileArray; + +template <typename TVal, typename TCompressor = TFakeCompression, typename TPageFile = TOutputPageFile> +class TOutDatFileArray { + typedef TOutDatFile<TVal, TCompressor, TPageFile> TFileType; + +public: + TOutDatFileArray(const TString& name, size_t pagesize, size_t pages, int pagesOrBytes = 1) + : Name(name) + , PageSize(pagesize) + , Pages(pages) + , PagesOrBytes(pagesOrBytes) + , NumFiles(0) + , Files(nullptr) + { + } + + ~TOutDatFileArray() { + for (int i = 0; i < NumFiles; ++i) { + Files[i].Close(); + Files[i].~TFileType(); + } + free(Files); + Files = nullptr; + NumFiles = 0; + } + + TFileType& operator[](size_t pos) { + return Files[pos]; + } + + void Open(int n, const TString& fname) { + char temp[FILENAME_MAX]; + + Name = fname; + NumFiles = CreateDatObjects(n, fname); + + int i; + try { + for (i = 0; i < NumFiles; ++i) { + sprintf(temp, fname.data(), i); + Files[i].Open(temp); + } + } catch (...) { + while (--i >= 0) + Files[i].Close(); + throw; + } + } + + template <typename TNameBuilder> + void OpenWithCallback(int n, const TNameBuilder& builder) { + NumFiles = CreateDatObjects(n, Name); + + for (int i = 0; i < NumFiles; ++i) + Files[i].Open(builder.GetName(i).data()); + } + + void Close() { + for (int i = 0; i < NumFiles; ++i) + Files[i].Close(); + } + + void CloseMT(ui32 threads) { + int current = 0; + TMutex mutex; + TVector<std::thread> thrs; + thrs.reserve(threads); + for (ui32 i = 0; i < threads; i++) { + thrs.emplace_back([this, ¤t, &mutex]() { + while (true) { + mutex.Acquire(); + int cur = current++; + mutex.Release(); + if (cur >= NumFiles) + break; + Files[cur].Close(); + } + }); + } + for (auto& thread : thrs) { + thread.join(); + } + } + + const char* GetName() const { + return Name.data(); + } + +protected: + int CreateDatObjects(int n, const TString& fname) { + if (!(Files = (TFileType*)malloc(n * sizeof(TFileType)))) + ythrow yexception() << "can't alloc \"" << fname << "\" file array: " << LastSystemErrorText(); + int num = 0; + char temp[FILENAME_MAX]; + for (int i = 0; i < n; ++i, ++num) { + sprintf(temp, "%s[%d]", fname.data(), i); + new (Files + i) TFileType(temp, PageSize, Pages, PagesOrBytes); + } + return num; + } + + TString Name; + size_t PageSize, Pages; + int PagesOrBytes, NumFiles; + TFileType* Files; +}; + +template <typename TVal, typename TKey, typename TCompressor = TFakeCompression, typename TPageFile = TOutputPageFile> +class TOutDirectFile: protected TOutDirectFileImpl<TVal, TKey, TCompressor, TPageFile> { + typedef TOutDirectFileImpl<TVal, TKey, TCompressor, TPageFile> TBase; + +public: + TOutDirectFile(const TString& name, size_t pagesize, size_t pages, size_t ipagesize, size_t ipages, int pagesOrBytes) + : Name(name) + , PageSize(pagesize) + , Pages(pages) + , IdxPageSize(ipagesize) + , IdxPages(ipages) + , PagesOrBytes(pagesOrBytes) + { + } + + ~TOutDirectFile() { + Close(); + } + + void Open(const TString& fname) { + int ret = TBase::Open(fname.data(), PageSize, Pages, IdxPageSize, IdxPages, PagesOrBytes); + if (ret) + ythrow yexception() << ErrorMessage(ret, "Failed to open output file", fname); + Name = fname; + } + + void Close() { + int ret; + if ((ret = TBase::GetError())) + if (!std::uncaught_exception()) + ythrow yexception() << ErrorMessage(ret, "Error before closing output file", Name); + if ((ret = TBase::Close())) + if (!std::uncaught_exception()) + ythrow yexception() << ErrorMessage(ret, "Error while closing output file", Name); + } + + const char* GetName() const { + return Name.data(); + } + + using TBase::Freeze; + using TBase::Push; + using TBase::PushWithExtInfo; + using TBase::Reserve; + using TBase::Unfreeze; + +protected: + TString Name; + size_t PageSize, Pages, IdxPageSize, IdxPages; + int PagesOrBytes; +}; + +template < + typename TVal, + template <typename T> class TComparer, + typename TCompress = TFakeCompression, + typename TSieve = TFakeSieve<TVal>, + typename TPageFile = TOutputPageFile, + typename TFileTypes = TDefInterFileTypes> +class TDatSorter: protected TDatSorterImpl<TVal, TComparer<TVal>, TCompress, TSieve, TPageFile, TFileTypes> { + typedef TDatSorterImpl<TVal, TComparer<TVal>, TCompress, TSieve, TPageFile, TFileTypes> TBase; + +public: + typedef TVal TRec; + +public: + TDatSorter(const TString& name, size_t memory, size_t pagesize, size_t pages, int pagesOrBytes = 1) + : Name(name) + , Memory(memory) + , PageSize(pagesize) + , Pages(pages) + , PagesOrBytes(pagesOrBytes) + { + Templ[0] = 0; + } + + ~TDatSorter() { + Close(); + Templ[0] = 0; + } + + void Open(const TString& dirName) { + int ret; + if (ret = MakeSorterTempl(Templ, dirName.data())) { + Templ[0] = 0; + ythrow yexception() << ErrorMessage(ret, Name + " sorter: bad tempdir", dirName); + } + if ((ret = TBase::Open(Templ, PageSize, Pages, PagesOrBytes))) + ythrow yexception() << ErrorMessage(ret, Name + " sorter: open error, temp dir", Templ); + } + + void Sort(bool direct = false) { + int ret = TBase::Sort(Memory, 1000, direct); + if (ret) + ythrow yexception() << ErrorMessage(ret, Name + " sorter: sort error, temp dir", Templ, TVal::RecordSig); + } + + void SortToFile(const TString& name) { + int ret = TBase::SortToFile(name.data(), Memory); + if (ret) + ythrow yexception() << ErrorMessage(ret, Name + "sorter: error in SortToFile", name, TVal::RecordSig); + } + + void SortToStream(TAutoPtr<IOutputStream> output) { + int ret = TBase::SortToStream(output, Memory); + if (ret) + ythrow yexception() << ErrorMessage(ret, Name + "sorter: error in SortToStream", "", TVal::RecordSig); + } + + void Close() { + int ret1 = TBase::GetError(); + int ret2 = TBase::Close(); + if (Templ[0]) { + *strrchr(Templ, GetDirectorySeparator()) = 0; + RemoveDirWithContents(Templ); + Templ[0] = 0; + } + if (ret1) + if (!std::uncaught_exception()) + ythrow yexception() << ErrorMessage(ret1, Name + "sorter: error before closing"); + if (ret2) + if (!std::uncaught_exception()) + ythrow yexception() << ErrorMessage(ret2, Name + "sorter: error while closing"); + } + + int Sort(size_t memory, int maxportions, bool direct = false) { + return TBase::Sort(memory, maxportions, direct); + } + + const char* GetName() const { + return Name.data(); + } + + using TBase::GetPageSize; + using TBase::GetPages; + using TBase::Next; + using TBase::NextPortion; + using TBase::Push; + using TBase::PushWithExtInfo; + using TBase::UseSegmentSorter; + +protected: + TString Name; + size_t Memory, PageSize, Pages; + int PagesOrBytes; + char Templ[FILENAME_MAX]; +}; + +template <typename TSorter> +class TSorterArray { +public: + typedef TSorter TDatSorter; + +public: + TSorterArray(const TString& name, size_t memory, size_t pagesize, size_t pages, int pagesOrBytes = 1) + : Name(name) + , Memory(memory) + , PageSize(pagesize) + , Pages(pages) + , PagesOrBytes(pagesOrBytes) + , NumSorters(0) + , Sorters(nullptr) + { + } + + ~TSorterArray() { + for (int i = 0; i < NumSorters; ++i) { + Sorters[i].Close(); + Sorters[i].~TSorter(); + } + free(Sorters); + Sorters = nullptr; + NumSorters = 0; + } + + TSorter& operator[](size_t pos) { + return Sorters[pos]; + } + + void Open(int n, const TString& fname, size_t memory = 0) { + if (!(Sorters = (TSorter*)malloc(n * sizeof(TSorter)))) + ythrow yexception() << "can't alloc \"" << fname << "\" sorter array: " << LastSystemErrorText(); + NumSorters = n; + char temp[FILENAME_MAX]; + if (memory) + Memory = memory; + for (int i = 0; i < NumSorters; ++i) { + sprintf(temp, "%s[%d]", Name.data(), i); + new (Sorters + i) TSorter(temp, Memory, PageSize, Pages, PagesOrBytes); + } + for (int i = 0; i < NumSorters; ++i) + Sorters[i].Open(fname); + } + + void Close() { + for (int i = 0; i < NumSorters; ++i) + Sorters[i].Close(); + } + + const char* GetName() const { + return Name.data(); + } + +protected: + TString Name; + size_t Memory, PageSize, Pages; + int PagesOrBytes, NumSorters; + TSorter* Sorters; +}; + +template <typename TVal, template <typename T> class TCompare, typename TSieve = TFakeSieve<TVal>> +class TDatSorterArray: public TSorterArray<TDatSorter<TVal, TCompare, TSieve>> { +public: + TDatSorterArray(const char* name, size_t memory, size_t pagesize, size_t pages, int pagesOrBytes = 1) + : TSorterArray<TDatSorter<TVal, TCompare, TSieve>>(name, memory, pagesize, pages, pagesOrBytes) + { + } +}; + +template <typename TVal, template <typename T> class TCompare, typename TCompress = TFakeCompression, + typename TSieve = TFakeSieve<TVal>, typename TPageFile = TOutputPageFile, typename TFileTypes = TDefInterFileTypes> +class TDatSorterMemo: public TDatSorter<TVal, TCompare, TCompress, TSieve, TPageFile, TFileTypes> { + typedef TDatSorter<TVal, TCompare, TCompress, TSieve, TPageFile, TFileTypes> TSorter; + +public: + TOutDatFile<TVal> Memo; + TString Home; + bool OpenReq; + bool Opened; + bool UseDirectWrite; + +public: + TDatSorterMemo(const char* name, size_t memory, size_t pagesize, size_t pages, int pagesOrBytes = 1) + : TSorter(name, memory, pagesize, pages, pagesOrBytes) + , Memo(name, pagesize, memory, 0) + { + OpenReq = false; + Opened = false; + UseDirectWrite = false; + } + + void Open(const TString& home) { + OpenReq = true; + // TSorter::Open(home); + Home = home; + Memo.Open(nullptr); + Memo.Freeze(); + } + + void Reopen(const char* home) { + Close(); + Open(home); + } + + void Open() { + if (!OpenReq) { + OpenReq = true; + Memo.Open(nullptr); + Memo.Freeze(); + } + } + + void OpenIfNeeded() { + if (OpenReq && !Opened) { + if (!Home) + ythrow yexception() << "Temp directory not specified, call Open(char*) first : " << TSorter::Name; + TSorter::Open(Home); + Opened = true; + } + } + + TVal* Reserve(size_t len) { + if (TExtInfoType<TVal>::Exists) + return ReserveWithExt(len, 0); + + TVal* u = Memo.Reserve(len); + if (!u) { + OpenIfNeeded(); + TSorter::NextPortion(UseDirectWrite); + Memo.Freeze(); + u = Memo.Reserve(len); + } + TSorter::PushWithExtInfo(u); + return u; + } + + TVal* ReserveWithExt(size_t len, size_t extSize) { + size_t fullLen = len + len_long((i64)extSize) + extSize; + TVal* u = Memo.Reserve(fullLen); + if (!u) { + OpenIfNeeded(); + TSorter::NextPortion(UseDirectWrite); + Memo.Freeze(); + u = Memo.Reserve(fullLen); + if (!u) { + if (fullLen > Memo.GetPageSize()) { + ythrow yexception() << "Size of element and " << len << " size of extInfo " << extSize + << " is larger than page size " << Memo.GetPageSize(); + } + ythrow yexception() << "going to insert a null pointer. Bad."; + } + } + out_long((i64)extSize, (char*)u + len); + TSorter::PushWithExtInfo(u); + return u; + } + + char* GetReservedExt(TVal* rec, size_t len, size_t extSize) { + return (char*)rec + len + len_long((i64)extSize); + } + + const TVal* Push(const TVal* v, const typename TExtInfoType<TVal>::TResult* extInfo = nullptr) { + const TVal* u = Memo.Push(v, extInfo); + if (!u) { + OpenIfNeeded(); + TSorter::NextPortion(UseDirectWrite); + Memo.Freeze(); + u = Memo.Push(v, extInfo); + if (!u) { + if (SizeOf(v) > Memo.GetPageSize()) { + ythrow yexception() << "Size of element " << SizeOf(v) + << " is larger than page size " << Memo.GetPageSize(); + } + ythrow yexception() << "going to insert a null pointer. Bad."; + } + } + TSorter::PushWithExtInfo(u); + return u; + } + + const TVal* Push(const TVal* v, const ui8* extInfoRaw, size_t extLen) { + const TVal* u = Memo.Push(v, extInfoRaw, extLen); + if (!u) { + OpenIfNeeded(); + TSorter::NextPortion(UseDirectWrite); + Memo.Freeze(); + u = Memo.Push(v, extInfoRaw, extLen); + if (!u) { + if (SizeOf(v) > Memo.GetPageSize()) { + ythrow yexception() << "Size of element " << SizeOf(v) + << " is larger than page size " << Memo.GetPageSize(); + } + ythrow yexception() << "going to insert a null pointer. Bad.."; + } + } + TSorter::PushWithExtInfo(u); + return u; + } + + const TVal* PushWithExtInfo(const TVal* v) { + const TVal* u = Memo.PushWithExtInfo(v); + if (!u) { + OpenIfNeeded(); + TSorter::NextPortion(UseDirectWrite); + Memo.Freeze(); + u = Memo.PushWithExtInfo(v); + if (!u) { + if (SizeOf(v) > Memo.GetPageSize()) { + ythrow yexception() << "Size of element " << SizeOf(v) + << " is larger than page size " << Memo.GetPageSize(); + } + ythrow yexception() << "going to insert a null pointer. Bad..."; + } + } + TSorter::PushWithExtInfo(u); + return u; + } + + void Sort(bool direct = false) { + if (Opened) { + TSorter::NextPortion(UseDirectWrite); + Memo.Close(); + OpenReq = false; + TSorter::Sort(direct); + } else { + TSorter::SortPortion(); + } + } + + const TVal* Next() { + return Opened ? TSorter::Next() : TSorter::Nextp(); + } + + bool GetExtInfo(typename TExtInfoType<TVal>::TResult* extInfo) const { + return NMicroBDB::GetExtInfo(Current(), extInfo); + } + + const ui8* GetExtInfoRaw(size_t* len) const { + return NMicroBDB::GetExtInfoRaw(Current(), len); + } + + const TVal* Current() const { + return Opened ? TSorter::Current() : TSorter::Currentp(); + } + + int NextPortion() { + OpenIfNeeded(); + return TSorter::NextPortion(UseDirectWrite); + } + + void SortToFile(const char* name) { + OpenIfNeeded(); + TSorter::NextPortion(UseDirectWrite); + Memo.Close(); + OpenReq = false; + TSorter::SortToFile(name); + } + + void SortToStream(TAutoPtr<IOutputStream> output) { + OpenIfNeeded(); + TSorter::NextPortion(UseDirectWrite); + Memo.Close(); + OpenReq = false; + TSorter::SortToStream(output); + } + + template <typename TKey, typename TOutCompress> + void SortToDirectFile(const char* name, size_t ipagesize, size_t ipages) { + Sort(); + TOutDirectFile<TVal, TKey, TOutCompress> out(TSorter::Name, TSorter::PageSize, TSorter::Pages, ipagesize, ipages, TSorter::PagesOrBytes); + out.Open(name); + while (const TVal* rec = Next()) + out.PushWithExtInfo(rec); + out.Close(); + } + + template <typename TKey> + void SortToDirectFile(const char* name, size_t ipagesize, size_t ipages) { + SortToDirectFile<TKey, TCompress>(name, ipagesize, ipages); + } + + void CloseSorter() { + if (Opened) + TSorter::Close(); + else + TSorter::Closep(); + Memo.Freeze(); + Opened = false; + } + + void Close() { + if (Opened) + TSorter::Close(); + else + TSorter::Closep(); + Memo.Close(); + OpenReq = false; + Opened = false; + } + + int SavePortions(const char* mask) { + return TSorter::SavePortions(mask, UseDirectWrite); + } + +public: + using TSorter::RestorePortions; +}; + +template <typename TVal, template <typename T> class TCompare, typename TCompress = TFakeCompression, + typename TSieve = TFakeSieve<TVal>, class TPageFile = TOutputPageFile, class TFileTypes = TDefInterFileTypes> +class TDatSorterMemoArray: public TSorterArray<TDatSorterMemo<TVal, TCompare, TCompress, TSieve, TPageFile, TFileTypes>> { +public: + typedef TSorterArray<TDatSorterMemo<TVal, TCompare, TCompress, TSieve, TPageFile, TFileTypes>> TBase; + + TDatSorterMemoArray(const char* name, size_t memory, size_t pagesize, size_t pages, int pagesOrBytes = 1) + : TBase(name, memory, pagesize, pages, pagesOrBytes) + { + } +}; + +#if defined(_MSC_VER) +#pragma warning(pop) +#endif diff --git a/library/cpp/microbdb/sorter.h b/library/cpp/microbdb/sorter.h new file mode 100644 index 0000000000..b2e7390377 --- /dev/null +++ b/library/cpp/microbdb/sorter.h @@ -0,0 +1,677 @@ +#pragma once + +#include <util/ysaveload.h> +#include <util/generic/algorithm.h> +#include <contrib/libs/libc_compat/include/link/link.h> + +#include "header.h" +#include "heap.h" +#include "extinfo.h" +#include "input.h" +#include "output.h" + +#ifdef TEST_MERGE +#define MBDB_SORT_FUN ::StableSort +#else +#define MBDB_SORT_FUN ::Sort +#endif + +template <class TVal, class TCompare, typename TCompress, typename TSieve, typename TOutPageFile, typename TFileTypes> +class TDatSorterImpl; + +template <class TVal> +struct TFakeSieve { + static inline int Sieve(TVal*, const TVal*) noexcept { + return 0; + } +}; + +template <class TSieve> +struct TIsSieveFake { + static const bool Result = false; +}; + +template <class T> +struct TIsSieveFake<TFakeSieve<T>> { + static const bool Result = true; +}; + +class TDefInterFileTypes { +public: + typedef TOutputPageFile TOutPageFile; + typedef TInputPageFile TInPageFile; +}; + +//class TCompressedInterFileTypes; + +template <class TVal, class TCompare, typename TCompress, typename TSieve, typename TOutPageFile = TOutputPageFile, typename TFileTypes = TDefInterFileTypes> +class TDatSorterImplBase: protected THeapIter<TVal, TInDatFileImpl<TVal, TInputRecordIterator<TVal, TInputPageIterator<typename TFileTypes::TInPageFile>>>, TCompare> { + typedef TOutputRecordIterator<TVal, TOutputPageIterator<typename TFileTypes::TOutPageFile>, TFakeIndexer, TCompress> TTmpRecIter; + typedef TInputRecordIterator<TVal, TInputPageIterator<typename TFileTypes::TInPageFile>> TInTmpRecIter; + +public: + typedef TOutDatFileImpl<TVal, TTmpRecIter> TTmpOut; + typedef TInDatFileImpl<TVal, TInTmpRecIter> TTmpIn; + + typedef TOutDatFileImpl<TVal, TOutputRecordIterator<TVal, TOutputPageIterator<TOutPageFile>, TFakeIndexer, TCompress>> TOut; + typedef THeapIter<TVal, TTmpIn, TCompare> TMyHeap; + typedef TVector<const TVal*> TMyVector; + typedef typename TMyVector::iterator TMyIterator; + + class IPortionSorter { + public: + virtual ~IPortionSorter() { + } + + virtual void Sort(TMyVector&, TTmpOut*) = 0; + }; + + class TDefaultSorter: public IPortionSorter { + public: + void Sort(TMyVector& vector, TTmpOut* out) override { + MBDB_SORT_FUN(vector.begin(), vector.end(), TCompare()); + + const typename TMyVector::const_iterator + end = (TIsSieveFake<TSieve>::Result) ? vector.end() : TDatSorterImplBase::SieveRange(vector.begin(), vector.end()); + + for (typename TMyVector::const_iterator it = vector.begin(); it != end; ++it) { + out->PushWithExtInfo(*it); + } + } + }; + + class TSegmentedSorter: public IPortionSorter { + class TAdaptor { + typedef typename TMyVector::const_iterator TConstIterator; + + public: + TAdaptor(TConstIterator b, TConstIterator e) + : Curr_(b) + , End_(e) + { + --Curr_; + } + + inline const TVal* Current() const { + return *Curr_; + } + + inline const TVal* Next() { + ++Curr_; + + if (Curr_ == End_) { + return nullptr; + } + + return *Curr_; + } + + private: + TConstIterator Curr_; + TConstIterator End_; + }; + + typedef THeapIter<TVal, TAdaptor, TCompare> TPortionsHeap; + + public: + void Sort(TMyVector& vector, TTmpOut* out) override { + TVector<TAdaptor> bounds; + typename TMyVector::iterator + it = vector.begin(); + const size_t portions = Max<size_t>(1, (vector.size() * sizeof(TVal)) / (4 << 20)); + const size_t step = vector.size() / portions; + + // Sort segments + while (it != vector.end()) { + const typename TMyVector::iterator + end = Min(it + step, vector.end()); + + MBDB_SORT_FUN(it, end, TCompare()); + + bounds.push_back(TAdaptor(it, end)); + + it = end; + } + + // + // Merge result + // + + TPortionsHeap heap(bounds); + + if (TIsSieveFake<TSieve>::Result) { + while (const TVal* val = heap.Next()) { + out->PushWithExtInfo(val); + } + } else { + const TVal* val = heap.Next(); + const TVal* prev = out->PushWithExtInfo(val); + + for (val = heap.Next(); val && prev; val = heap.Next()) { + if (TSieve::Sieve((TVal*)prev, val)) { + continue; + } + + prev = out->PushWithExtInfo(val); + } + + if (prev) { + TSieve::Sieve((TVal*)prev, prev); + } + } + } + }; + +public: + TDatSorterImplBase() + : Sorter(new TDefaultSorter) + { + InFiles = nullptr; + TempBuf = nullptr; + Ptr = Vector.end(); + Cur = nullptr; + Portions = CPortions = Error = 0; + } + + ~TDatSorterImplBase() { + Close(); + } + + int Open(const char* templ, size_t pagesize, size_t pages, int pagesOrBytes = 1) { + Portions = CPortions = Error = 0; + TempBuf = strdup(templ); + Pagesize = pagesize; + if (pagesOrBytes) + Pages = pages; + else + Pages = pages / pagesize; + Pages = Max(1, Pages); + return 0; + } + + void Push(const TVal* v) { + // Serialized extInfo must follow a record being pushed, therefore, to avoid + // unintentional misusage (as if when you are adding TExtInfo in your record + // type: you may forget to check your sorting routines and get a segfault as + // a result). + // PushWithExtInfo(v) should be called on records with extInfo. + static_assert(!TExtInfoType<TVal>::Exists, "expect !TExtInfoType<TVal>::Exists"); + + Vector.push_back(v); + } + + void PushWithExtInfo(const TVal* v) { + Vector.push_back(v); + } + + int SortPortion() { + Ptr = Vector.end(); + Cur = nullptr; + if (!Vector.size() || Error) + return Error; + + MBDB_SORT_FUN(Vector.begin(), Vector.end(), TCompare()); + + if (!TIsSieveFake<TSieve>::Result) { + const typename TMyVector::iterator + end = SieveRange(Vector.begin(), Vector.end()); + + Vector.resize(end - Vector.begin()); + } + + Ptr = Vector.begin(); + Cur = nullptr; + return 0; + } + + const TVal* Nextp() { + Cur = Ptr == Vector.end() ? nullptr : *Ptr++; + return Cur; + } + + const TVal* Currentp() const { + return Cur; + } + + void Closep() { + Vector.clear(); + Ptr = Vector.end(); + Cur = nullptr; + } + + int NextPortion(bool direct = false) { + if (!Vector.size() || Error) + return Error; + + TTmpOut out; + int ret, ret1; + char fname[FILENAME_MAX]; + + snprintf(fname, sizeof(fname), TempBuf, Portions++); + if ((ret = out.Open(fname, Pagesize, Pages, 1, direct))) + return Error = ret; + + Sorter->Sort(Vector, &out); + + Vector.erase(Vector.begin(), Vector.end()); + ret = out.GetError(); + ret1 = out.Close(); + Error = Error ? Error : ret ? ret : ret1; + if (Error) + unlink(fname); + return Error; + } + + int SavePortions(const char* mask, bool direct = false) { + char srcname[PATH_MAX], dstname[PATH_MAX]; + if (Vector.size()) + NextPortion(direct); + for (int i = 0; i < Portions; i++) { + char num[10]; + sprintf(num, "%i", i); + snprintf(srcname, sizeof(srcname), TempBuf, i); + snprintf(dstname, sizeof(dstname), mask, num); + int res = rename(srcname, dstname); + if (res) + return res; + } + snprintf(dstname, sizeof(dstname), mask, "count"); + TOFStream fcount(dstname); + Save(&fcount, Portions); + fcount.Finish(); + return 0; + } + + int RestorePortions(const char* mask) { + char srcname[PATH_MAX], dstname[PATH_MAX]; + snprintf(srcname, sizeof(srcname), mask, "count"); + TIFStream fcount(srcname); + Load(&fcount, Portions); + for (int i = 0; i < Portions; i++) { + char num[10]; + sprintf(num, "%i", i); + snprintf(dstname, sizeof(dstname), TempBuf, i); + snprintf(srcname, sizeof(srcname), mask, num); + unlink(dstname); + int res = link(srcname, dstname); + if (res) + return res; + } + return 0; + } + + int RestorePortions(const char* mask, ui32 count) { + char srcname[PATH_MAX], dstname[PATH_MAX]; + ui32 portions; + TVector<ui32> counts; + for (ui32 j = 0; j < count; j++) { + snprintf(srcname, sizeof(srcname), mask, j, "count"); + TIFStream fcount(srcname); + Load(&fcount, portions); + counts.push_back(portions); + Portions += portions; + } + ui32 p = 0; + for (ui32 j = 0; j < count; j++) { + int cnt = counts[j]; + for (int i = 0; i < cnt; i++, p++) { + char num[10]; + sprintf(num, "%i", i); + snprintf(dstname, sizeof(dstname), TempBuf, p); + snprintf(srcname, sizeof(srcname), mask, j, num); + unlink(dstname); + int res = link(srcname, dstname); + if (res) { + fprintf(stderr, "Can not link %s to %s\n", srcname, dstname); + return res; + } + } + } + return 0; + } + + int Sort(size_t memory, int maxportions = 1000, bool direct = false) { + int ret, end, beg, i; + char fname[FILENAME_MAX]; + + if (Vector.size()) + NextPortion(); + + if (Error) + return Error; + if (!Portions) { + TMyHeap::Init(&DummyFile, 1); // closed file + HPages = 1; + return 0; + } + + Optimize(memory, maxportions); + if (!(InFiles = new TTmpIn[MPortions])) + return MBDB_NO_MEMORY; + + for (beg = 0; beg < Portions && !Error; beg = end) { + end = (int)Min(beg + FPortions, Portions); + for (i = beg; i < end && !Error; i++) { + snprintf(fname, sizeof(fname), TempBuf, i); + if ((ret = InFiles[i - beg].Open(fname, HPages, 1, nullptr, direct))) + Error = Error ? Error : ret; + } + if (Error) + return Error; + TMyHeap::Init(InFiles, end - beg); + if (end != Portions) { + TTmpOut out; + const TVal* v; + snprintf(fname, sizeof(fname), TempBuf, Portions++); + if ((ret = out.Open(fname, Pagesize, HPages))) + return Error = Error ? Error : ret; + while ((v = TMyHeap::Next())) + out.PushWithExtInfo(v); + ret = out.GetError(); + Error = Error ? Error : ret; + ret = out.Close(); + Error = Error ? Error : ret; + for (i = beg; i < end; i++) { + ret = InFiles[i - beg].Close(); + Error = Error ? Error : ret; + snprintf(fname, sizeof(fname), TempBuf, CPortions++); + unlink(fname); + } + } + FPortions = MPortions; + } + return Error; + } + + int Close() { + char fname[FILENAME_MAX]; + delete[] InFiles; + InFiles = nullptr; + Closep(); + for (int i = CPortions; i < Portions; i++) { + snprintf(fname, sizeof(fname), TempBuf, i); + unlink(fname); + } + CPortions = Portions = 0; + free(TempBuf); + TempBuf = nullptr; + return Error; + } + + void UseSegmentSorter() { + Sorter.Reset(new TSegmentedSorter); + } + + inline int GetError() const { + return Error; + } + + inline int GetPages() const { + return Pages; + } + + inline int GetPageSize() const { + return Pagesize; + } + +private: + static TMyIterator SieveRange(const TMyIterator begin, const TMyIterator end) { + TMyIterator it = begin; + TMyIterator prev = begin; + + for (++it; it != end; ++it) { + if (TSieve::Sieve((TVal*)*prev, *it)) { + continue; + } + + ++prev; + + if (it != prev) { + *prev = *it; + } + } + + TSieve::Sieve((TVal*)*prev, *prev); + + return ++prev; + } + +protected: + void Optimize(size_t memory, int maxportions, size_t fbufmax = 256u << 20) { + maxportions = (int)Min((size_t)maxportions, memory / Pagesize) - 1; + size_t maxpages = Max((size_t)1u, fbufmax / Pagesize); + + if (maxportions <= 2) { + FPortions = MPortions = 2; + HPages = 1; + return; + } + if (maxportions >= Portions) { + FPortions = MPortions = Portions; + HPages = (int)Min(memory / ((Portions + 1) * Pagesize), maxpages); + return; + } + if (((Portions + maxportions - 1) / maxportions) <= maxportions) { + while (((Portions + maxportions - 1) / maxportions) <= maxportions) + --maxportions; + MPortions = ++maxportions; + int total = ((Portions + maxportions - 1) / maxportions) + Portions; + FPortions = (total % maxportions) ? (total % maxportions) : MPortions; + HPages = (int)Min(memory / ((MPortions + 1) * Pagesize), maxpages); + return; + } + FPortions = MPortions = maxportions; + HPages = (int)Min(memory / ((MPortions + 1) * Pagesize), maxpages); + } + + TMyVector Vector; + typename TMyVector::iterator Ptr; + const TVal* Cur; + TTmpIn *InFiles, DummyFile; + char* TempBuf; + int Portions, CPortions, Pagesize, Pages, Error; + int FPortions, MPortions, HPages; + THolder<IPortionSorter> Sorter; +}; + +template <class TVal, class TCompare, typename TCompress> +class TDatSorterImpl<TVal, TCompare, TCompress, TFakeSieve<TVal>, TOutputPageFile, TDefInterFileTypes> + : public TDatSorterImplBase<TVal, TCompare, TCompress, TFakeSieve<TVal>, TOutputPageFile, TDefInterFileTypes> { + typedef TDatSorterImplBase<TVal, TCompare, TCompress, TFakeSieve<TVal>, TOutputPageFile, TDefInterFileTypes> TBase; + +public: + int SortToFile(const char* name, size_t memory, int maxportions = 1000) { + int ret = TBase::Sort(memory, maxportions); + if (ret) + return ret; + typename TBase::TOut out; + if ((ret = out.Open(name, TBase::Pagesize, TBase::HPages))) + return ret; + const TVal* rec; + while ((rec = Next())) + out.PushWithExtInfo(rec); + if ((ret = out.GetError())) + return ret; + if ((ret = out.Close())) + return ret; + if ((ret = TBase::Close())) + return ret; + return 0; + } + + int SortToStream(TAutoPtr<IOutputStream> output, size_t memory, int maxportions = 1000) { + int ret = TBase::Sort(memory, maxportions); + if (ret) + return ret; + typename TBase::TOut out; + if ((ret = out.Open(output, TBase::Pagesize, TBase::HPages))) + return ret; + const TVal* rec; + while ((rec = Next())) + out.PushWithExtInfo(rec); + if ((ret = out.GetError())) + return ret; + if ((ret = out.Close())) + return ret; + if ((ret = TBase::Close())) + return ret; + return 0; + } + + const TVal* Next() { + return TBase::TMyHeap::Next(); + } + + const TVal* Current() const { + return TBase::TMyHeap::Current(); + } + + bool GetExtInfo(typename TExtInfoType<TVal>::TResult* extInfo) const { + return TBase::TMyHeap::GetExtInfo(extInfo); + } + + const ui8* GetExtInfoRaw(size_t* len) const { + return TBase::TMyHeap::GetExtInfoRaw(len); + } +}; + +template <class TVal, class TCompare, typename TCompress, typename TSieve, + typename TOutPageFile = TOutputPageFile, typename TFileTypes = TDefInterFileTypes> +class TDatSorterImpl: public TDatSorterImplBase<TVal, TCompare, TCompress, TSieve, TOutPageFile, TFileTypes> { + typedef TDatSorterImplBase<TVal, TCompare, TCompress, TSieve, TOutPageFile, TFileTypes> TBase; + +public: + TDatSorterImpl() + : Cur(nullptr) + , Prev(nullptr) + { + } + + int SortToFile(const char* name, size_t memory, int maxportions = 1000) { + int ret = Sort(memory, maxportions); + if (ret) + return ret; + typename TBase::TOut out; + if ((ret = out.Open(name, TBase::Pagesize, TBase::HPages))) + return ret; + const TVal* rec; + while ((rec = Next())) + out.PushWithExtInfo(rec); + if ((ret = out.GetError())) + return ret; + if ((ret = out.Close())) + return ret; + if ((ret = TBase::Close())) + return ret; + return 0; + } + + int SortToStream(TAutoPtr<IOutputStream> output, size_t memory, int maxportions = 1000) { + int ret = Sort(memory, maxportions); + if (ret) + return ret; + typename TBase::TOut out; + if ((ret = out.Open(output, TBase::Pagesize, TBase::HPages))) + return ret; + const TVal* rec; + while ((rec = Next())) + out.PushWithExtInfo(rec); + if ((ret = out.GetError())) + return ret; + if ((ret = out.Close())) + return ret; + if ((ret = TBase::Close())) + return ret; + return 0; + } + + int Open(const char* templ, size_t pagesize, size_t pages, int pagesOrBytes = 1) { + int res = TBase::Open(templ, pagesize, pages, pagesOrBytes); + Prev = nullptr; + Cur = nullptr; + return res; + } + + int Sort(size_t memory, int maxportions = 1000, bool direct = false) { + int res = TBase::Sort(memory, maxportions, direct); + if (!res) { + const TVal* rec = TBase::TMyHeap::Next(); + if (rec) { + size_t els, es; + size_t sz = NMicroBDB::SizeOfExt(rec, &els, &es); + sz += els + es; + if (!TExtInfoType<TVal>::Exists) + Cur = (TVal*)malloc(sizeof(TVal)); + else + Cur = (TVal*)malloc(TBase::Pagesize); + memcpy(Cur, rec, sz); + } + } + return res; + } + + // Prev = last returned + // Cur = current accumlating with TSieve + + const TVal* Next() { + if (!Cur) { + if (Prev) { + free(Prev); + Prev = nullptr; + } + return nullptr; + } + const TVal* rec; + + if (TIsSieveFake<TSieve>::Result) + rec = TBase::TMyHeap::Next(); + else { + do { + rec = TBase::TMyHeap::Next(); + } while (rec && TSieve::Sieve((TVal*)Cur, rec)); + } + + if (!Prev) { + if (!TExtInfoType<TVal>::Exists) + Prev = (TVal*)malloc(sizeof(TVal)); + else + Prev = (TVal*)malloc(TBase::Pagesize); + } + size_t els, es; + size_t sz = NMicroBDB::SizeOfExt(Cur, &els, &es); + sz += els + es; + memcpy(Prev, Cur, sz); + + if (rec) { + sz = NMicroBDB::SizeOfExt(rec, &els, &es); + sz += els + es; + memcpy(Cur, rec, sz); + } else { + TSieve::Sieve((TVal*)Cur, Cur); + free(Cur); + Cur = nullptr; + } + return Prev; + } + + const TVal* Current() const { + return Prev; + } + + int Close() { + int res = TBase::Close(); + if (Prev) { + free(Prev); + Prev = nullptr; + } + if (Cur) { + free(Cur); + Cur = nullptr; + } + return res; + } + +protected: + TVal* Cur; + TVal* Prev; +}; diff --git a/library/cpp/microbdb/sorterdef.h b/library/cpp/microbdb/sorterdef.h new file mode 100644 index 0000000000..8834b5fff8 --- /dev/null +++ b/library/cpp/microbdb/sorterdef.h @@ -0,0 +1,19 @@ +#pragma once + +#define MAKESORTERTMPL(TRecord, MemberFunc) \ + template <typename T> \ + struct MemberFunc; \ + template <> \ + struct MemberFunc<TRecord> { \ + bool operator()(const TRecord* l, const TRecord* r) { \ + return TRecord ::MemberFunc(l, r) < 0; \ + } \ + int operator()(const TRecord* l, const TRecord* r, int) { \ + return TRecord ::MemberFunc(l, r); \ + } \ + } + +template <typename T> +static inline int compare(const T& a, const T& b) { + return (a < b) ? -1 : (a > b); +} diff --git a/library/cpp/microbdb/utility.h b/library/cpp/microbdb/utility.h new file mode 100644 index 0000000000..5c86061bca --- /dev/null +++ b/library/cpp/microbdb/utility.h @@ -0,0 +1,75 @@ +#pragma once + +#include "microbdb.h" + +template <class TRecord, template <class T> class TCompare> +int SortData(const TFile& ifile, const TFile& ofile, const TDatMetaPage* meta, size_t memory, const char* tmpDir = nullptr) { + char templ[FILENAME_MAX]; + TInDatFileImpl<TRecord> datin; + TOutDatFileImpl<TRecord> datout; + TDatSorterImpl<TRecord, TCompare<TRecord>, TFakeCompression, TFakeSieve<TRecord>> sorter; + const TRecord* u; + int ret; + + const size_t minMemory = (2u << 20); + memory = Max(memory, minMemory + minMemory / 2); + if (datin.Open(ifile, meta, memory - minMemory, 0)) + err(1, "can't read input file"); + + size_t outpages = Max((size_t)2u, minMemory / datin.GetPageSize()); + memory -= outpages * datin.GetPageSize(); + + if (ret = MakeSorterTempl(templ, tmpDir)) + err(1, "can't create tempdir in \"%s\"; error: %d\n", templ, ret); + + if (sorter.Open(templ, datin.GetPageSize(), outpages)) { + *strrchr(templ, LOCSLASH_C) = 0; + RemoveDirWithContents(templ); + err(1, "can't open sorter"); + } + + while (1) { + datin.Freeze(); + while ((u = datin.Next())) + sorter.PushWithExtInfo(u); + sorter.NextPortion(); + if (datin.GetError() || datin.IsEof()) + break; + } + + if (datin.GetError()) { + *strrchr(templ, LOCSLASH_C) = 0; + RemoveDirWithContents(templ); + err(1, "in data file error %d", datin.GetError()); + } + if (datin.Close()) { + *strrchr(templ, LOCSLASH_C) = 0; + RemoveDirWithContents(templ); + err(1, "can't close in data file"); + } + + sorter.Sort(memory); + + if (datout.Open(ofile, datin.GetPageSize(), outpages)) { + *strrchr(templ, LOCSLASH_C) = 0; + RemoveDirWithContents(templ); + err(1, "can't write out file"); + } + + while ((u = sorter.Next())) + datout.PushWithExtInfo(u); + + if (sorter.GetError()) + err(1, "sorter error %d", sorter.GetError()); + if (sorter.Close()) + err(1, "can't close sorter"); + + *strrchr(templ, LOCSLASH_C) = 0; + RemoveDirWithContents(templ); + + if (datout.GetError()) + err(1, "out data file error %d", datout.GetError()); + if (datout.Close()) + err(1, "can't close out data file"); + return 0; +} diff --git a/library/cpp/microbdb/wrappers.h b/library/cpp/microbdb/wrappers.h new file mode 100644 index 0000000000..38eb8edebc --- /dev/null +++ b/library/cpp/microbdb/wrappers.h @@ -0,0 +1,637 @@ +#pragma once + +#include "microbdb.h" + +#define MAKEFILTERTMPL(TRecord, MemberFunc, NS) \ + template <typename T> \ + struct MemberFunc; \ + template <> \ + struct MemberFunc<TRecord> { \ + bool operator()(const TRecord* r) { \ + return NS::MemberFunc(r); \ + } \ + } + +#define MAKEJOINTMPL(TRecordA, TRecordB, MemberFunc, NS, TMergeType) \ + template <typename A, typename B> \ + struct MemberFunc; \ + template <> \ + struct MemberFunc<TRecordA, TRecordB> { \ + int operator()(const TRecordA* l, const TRecordB* r) { \ + return NS::MemberFunc(l, r); \ + } \ + }; \ + typedef TMergeRec<TRecordA, TRecordB> TMergeType + +#define MAKEJOINTMPL2(TRecordA, TRecordB, MemberFunc, StructName, TMergeType) \ + template <typename A, typename B> \ + struct StructName; \ + template <> \ + struct StructName<TRecordA, TRecordB> { \ + int operator()(const TRecordA* l, const TRecordB* r) { \ + return MemberFunc(l, r); \ + } \ + }; \ + typedef TMergeRec<TRecordA, TRecordB> TMergeType + +#define MAKEJOINTMPLLEFT(TRecordA, TRecordB, MemberFunc, NS, TMergeType) \ + template <typename A, typename B> \ + struct MemberFunc; \ + template <> \ + struct MemberFunc<TRecordA, TRecordB> { \ + int operator()(const TRecordA* l, const TRecordB* r) { \ + return NS::MemberFunc(l->RecA, r); \ + } \ + }; \ + typedef TMergeRec<TRecordA, TRecordB> TMergeType + +template <class TRec> +class IDatNextSource { +public: + virtual const TRec* Next() = 0; + virtual void Work() { + } +}; + +template <class TRec> +class IDatNextReceiver { +public: + IDatNextReceiver(IDatNextSource<TRec>& source) + : Source(source) + { + } + + virtual void Work() { + Source.Work(); + } + +protected: + IDatNextSource<TRec>& Source; +}; + +template <class TInRec, class TOutRec> +class IDatNextChannel: public IDatNextReceiver<TInRec>, public IDatNextSource<TOutRec> { +public: + IDatNextChannel(IDatNextSource<TInRec>& source) + : IDatNextReceiver<TInRec>(source) + { + } + + virtual void Work() { + IDatNextReceiver<TInRec>::Work(); + } +}; + +class IDatWorker { +public: + virtual void Work() = 0; +}; + +template <class TRec> +class IDatPushReceiver { +public: + virtual void Push(const TRec* rec) = 0; + virtual void Work() = 0; +}; + +template <class TRec> +class IDatPushSource { +public: + IDatPushSource(IDatPushReceiver<TRec>& receiver) + : Receiver(receiver) + { + } + + virtual void Work() { + Receiver.Work(); + } + +protected: + IDatPushReceiver<TRec>& Receiver; +}; + +template <class TInRec, class TOutRec> +class IDatPushChannel: public IDatPushReceiver<TInRec>, public IDatPushSource<TOutRec> { +public: + IDatPushChannel(IDatPushReceiver<TOutRec>& receiver) + : IDatPushSource<TOutRec>(receiver) + { + } + + virtual void Work() { + IDatPushSource<TOutRec>::Work(); + } +}; + +template <class TRec> +class IDatNextToPush: public IDatNextReceiver<TRec>, public IDatPushSource<TRec> { + typedef IDatNextReceiver<TRec> TNextReceiver; + typedef IDatPushSource<TRec> TPushSource; + +public: + IDatNextToPush(IDatNextSource<TRec>& source, IDatPushReceiver<TRec>& receiver) + : TNextReceiver(source) + , TPushSource(receiver) + { + } + + virtual void Work() { + const TRec* rec; + while (rec = TNextReceiver::Source.Next()) + TPushSource::Receiver.Push(rec); + TPushSource::Work(); + TNextReceiver::Work(); + } +}; + +template <class TRec> +class TDatNextPNSplitter: public IDatNextReceiver<TRec>, public IDatNextSource<TRec>, public IDatPushSource<TRec> { +public: + TDatNextPNSplitter(IDatNextSource<TRec>& source, IDatPushReceiver<TRec>& receiver) + : IDatNextReceiver<TRec>(source) + , IDatNextSource<TRec>() + , IDatPushSource<TRec>(receiver) + { + } + + const TRec* Next() { + const TRec* rec = IDatNextReceiver<TRec>::Source.Next(); + if (rec) { + IDatPushSource<TRec>::Receiver.Push(rec); + return rec; + } else { + return 0; + } + } + + virtual void Work() { + IDatNextReceiver<TRec>::Work(); + IDatPushSource<TRec>::Work(); + } +}; + +template <class TRec, class TOutRecA = TRec, class TOutRecB = TRec> +class TDatPushPPSplitter: public IDatPushReceiver<TRec>, public IDatPushSource<TOutRecA>, public IDatPushSource<TOutRecB> { +public: + TDatPushPPSplitter(IDatPushReceiver<TOutRecA>& receiverA, IDatPushReceiver<TOutRecB>& receiverB) + : IDatPushSource<TOutRecA>(receiverA) + , IDatPushSource<TOutRecB>(receiverB) + { + } + + void Push(const TRec* rec) { + IDatPushSource<TOutRecA>::Receiver.Push(rec); + IDatPushSource<TOutRecB>::Receiver.Push(rec); + } + + void Work() { + IDatPushSource<TOutRecA>::Work(); + IDatPushSource<TOutRecB>::Work(); + } +}; + +template <class TRec> +class TFastInDatFile: public TInDatFile<TRec>, public IDatNextSource<TRec> { +public: + typedef TInDatFile<TRec> Base; + + TFastInDatFile(const char* name, bool open = true, size_t pages = dbcfg::fbufsize, int pagesOrBytes = 0) + : TInDatFile<TRec>(name, pages, pagesOrBytes) + , FileName(name) + { + if (open) + Base::Open(name); + } + + void Open() { + Base::Open(FileName); + } + + template <class TPassRec> + bool PassToUid(const TRec* inrec, const TPassRec* torec) { + inrec = Base::Current(); + while (inrec && CompareUids(inrec, torec) < 0) + inrec = Base::Next(); + return (inrec && CompareUids(inrec, torec) == 0); + } + + void Work() { + Base::Close(); + } + + const TRec* Next() { + return Base::Next(); + } + +private: + TString FileName; +}; + +template <class TRec> +class TPushOutDatFile: public TOutDatFile<TRec>, public IDatPushReceiver<TRec> { +public: + typedef TOutDatFile<TRec> Base; + + TPushOutDatFile(const char* name, bool open = true) + : Base(name, dbcfg::pg_docuid, dbcfg::fbufsize, 0) + , FileName(name) + { + if (open) + Base::Open(name); + } + + void Open() { + Base::Open(~FileName); + } + + void Push(const TRec* rec) { + Base::Push(rec); + } + + void Work() { + Base::Close(); + } + +private: + TString FileName; +}; + +template <class TRec> +class TNextOutDatFile: public IDatNextToPush<TRec> { +public: + typedef IDatNextToPush<TRec> TBase; + + TNextOutDatFile(const char* name, IDatNextSource<TRec>& source, bool open = true) + : TBase(source, File) + , File(name, open) + { + } + + void Open() { + File.Open(); + } + +private: + TPushOutDatFile<TRec> File; +}; + +template <class TVal, template <typename T> class TCompare> +class TNextDatSorterMemo: public TDatSorterMemo<TVal, TCompare>, public IDatNextChannel<TVal, TVal> { + typedef TDatSorterMemo<TVal, TCompare> TImpl; + +public: + TNextDatSorterMemo(IDatNextSource<TVal>& source, const char* dir = dbcfg::fname_temp, const char* name = "yet another sorter", size_t memory = dbcfg::small_sorter_size, size_t pagesize = dbcfg::pg_docuid, size_t pages = dbcfg::fbufsize, int pagesOrBytes = 0) + : TImpl(name, memory, pagesize, pages, pagesOrBytes) + , IDatNextChannel<TVal, TVal>(source) + , Sorted(false) + { + TImpl::Open(dir); + } + + void Sort() { + const TVal* rec; + while (rec = IDatNextChannel<TVal, TVal>::Source.Next()) { + TImpl::Push(rec); + } + TImpl::Sort(); + Sorted = true; + } + + const TVal* Next() { + if (!Sorted) + Sort(); + return TImpl::Next(); + } + +private: + bool Sorted; + TString Dir; +}; + +template <class TInRec, class TOutRec> +class TDatConverter: public IDatNextChannel<TInRec, TOutRec> { +public: + TDatConverter(IDatNextSource<TInRec>& source) + : IDatNextChannel<TInRec, TOutRec>(source) + { + } + + virtual void Convert(const TInRec& inrec, TOutRec& outrec) { + outrec(inrec); + } + + const TOutRec* Next() { + const TInRec* rec = IDatNextChannel<TInRec, TOutRec>::Source.Next(); + if (!rec) + return 0; + Convert(*rec, CurrentRec); + return &CurrentRec; + } + +private: + TOutRec CurrentRec; +}; + +template <class TRecA, class TRecB> +class TMergeRec { +public: + const TRecA* RecA; + const TRecB* RecB; +}; + +enum NMergeTypes { + MT_JOIN = 0, + MT_ADD = 1, + MT_OVERWRITE = 2, + MT_TYPENUM +}; + +template <class TRecA, class TRecB, template <typename TA, typename TB> class TCompare> +class TNextDatMerger: public IDatNextReceiver<TRecA>, public IDatNextReceiver<TRecB>, public IDatNextSource<TMergeRec<TRecA, TRecB>> { +public: + TNextDatMerger(IDatNextSource<TRecA>& sourceA, IDatNextSource<TRecB>& sourceB, ui8 mergeType) + : IDatNextReceiver<TRecA>(sourceA) + , IDatNextReceiver<TRecB>(sourceB) + , MergeType(mergeType) + , MoveA(false) + , MoveB(false) + , NotInit(true) + { + } + + const TMergeRec<TRecA, TRecB>* Next() { + if (MoveA || NotInit) + SourceARec = IDatNextReceiver<TRecA>::Source.Next(); + if (MoveB || NotInit) + SourceBRec = IDatNextReceiver<TRecB>::Source.Next(); + NotInit = false; + + // Cout << "Next " << SourceARec->HostId << "\t" << SourceBRec->HostId << "\t" << TCompare<TRecA, TRecB>()(SourceARec, SourceBRec) << "\t" << ::compare(SourceARec->HostId, SourceBRec->HostId) << "\t" << ::compare(1, 2) << "\t" << ::compare(2,1) << Endl; + if (MergeType == MT_ADD && SourceARec && (!SourceBRec || TCompare<TRecA, TRecB>()(SourceARec, SourceBRec) < 0)) { + MergeRec.RecA = SourceARec; + MergeRec.RecB = 0; + MoveA = true; + MoveB = false; + return &MergeRec; + } + + if (MergeType == MT_ADD && SourceBRec && (!SourceARec || TCompare<TRecA, TRecB>()(SourceARec, SourceBRec) < 0)) { + MergeRec.RecA = 0; + MergeRec.RecB = SourceBRec; + MoveA = false; + MoveB = true; + return &MergeRec; + } + + if (MergeType == MT_ADD && SourceARec && SourceBRec && TCompare<TRecA, TRecB>()(SourceARec, SourceBRec) == 0) { + MergeRec.RecA = SourceARec; + MergeRec.RecB = SourceBRec; + MoveA = true; + MoveB = true; + return &MergeRec; + } + + while (MergeType == MT_JOIN && SourceARec && SourceBRec && TCompare<TRecA, TRecB>()(SourceARec, SourceBRec) != 0) { + while (SourceARec && TCompare<TRecA, TRecB>()(SourceARec, SourceBRec) < 0) { + SourceARec = IDatNextReceiver<TRecA>::Source.Next(); + } + while (SourceARec && SourceBRec && TCompare<TRecA, TRecB>()(SourceARec, SourceBRec) > 0) { + SourceBRec = IDatNextReceiver<TRecB>::Source.Next(); + } + } + + if (MergeType == MT_JOIN && SourceARec && SourceBRec) { + MergeRec.RecA = SourceARec; + MergeRec.RecB = SourceBRec; + MoveA = true; + MoveB = true; + return &MergeRec; + } + + MergeRec.RecA = 0; + MergeRec.RecB = 0; + return 0; + } + + void Work() { + IDatNextReceiver<TRecA>::Source.Work(); + IDatNextReceiver<TRecB>::Source.Work(); + } + +private: + TMergeRec<TRecA, TRecB> MergeRec; + const TRecA* SourceARec; + const TRecB* SourceBRec; + ui8 MergeType; + bool MoveA; + bool MoveB; + bool NotInit; +}; + +/*template<class TRec, class TSource, template <typename T> class TCompare, class TReceiver = TPushOutDatFile<TRec> > +class TPushDatMerger { +public: + TPushDatMerger(TSource& source, TReceiver& receiver, ui8 mergeType) + : Source(source) + , Receiver(receiver) + , MergeType(mergeType) + { + } + + virtual void Init() { + SourceRec = Source.Next(); + } + + virtual void Push(const TRec* rec) { + while (SourceRec && TCompare<TRec>()(SourceRec, rec, 0) < 0) { + if (MergeType == MT_OVERWRITE || MergeType == MT_ADD) + Receiver.Push(SourceRec); + SourceRec = Source.Next(); + } + + bool intersected = false; + while (SourceRec && TCompare<TRec>()(SourceRec, rec, 0) == 0) { + intersected = true; + if (MergeType == MT_ADD) + Receiver.Push(SourceRec); + SourceRec = Source.Next(); + } + + if (intersected && MergeType == MT_JOIN) + Receiver.Push(rec); + + if (MergeType == MT_OVERWRITE || MergeType == MT_ADD) + Receiver.Push(rec); + } + + virtual void Term() { + if (MergeType == MT_OVERWRITE || MergeType == MT_ADD) { + while (SourceRec) { + Receiver.Push(SourceRec); + SourceRec = Source.Next(); + } + } + } + +private: + TSource& Source; + const TRec* SourceRec; + TReceiver& Receiver; + ui8 MergeType; +};*/ + +/*template <class TRec, class TSourceA, class TSourceB, template <typename T> class TCompare, class TReceiver = TPushOutDatFile<TRec> > +class TNextDatMerger: public TPushDatMerger<TRec, TSourceA, TCompare, TReceiver> { + typedef TPushDatMerger<TRec, TSourceA, TCompare, TReceiver> TImpl; +public: + TNextDatMerger(TSourceA& sourceA, TSourceB& sourceB, TReceiver& receiver, ui8 mergeType) + : TImpl(sourceA, receiver, mergeType) + , SourceB(sourceB) + { + } + + virtual void Work() { + TImpl::Init(); + while (SourceBRec = SourceB.Next()) { + TImpl::Push(SourceBRec); + } + TImpl::Term(); + } +private: + TSourceB& SourceB; + const TRec* SourceBRec; +};*/ + +/*template <class TRec, template <typename T> class TCompare, class TReceiver = TPushOutDatFile<TRec> > +class TFilePushDatMerger: public TPushDatMerger<TRec, TFastInDatFile<TRec>, TCompare, TReceiver> { + typedef TPushDatMerger<TRec, TFastInDatFile<TRec>, TCompare, TReceiver> TImpl; +public: + TFilePushDatMerger(const char* name, TReceiver& receiver, ui8 mergeType) + : TImpl(SourceFile, receiver, mergeType) + , SourceFile(name) + { + } + + virtual void Push(const TRec* rec) { + TImpl::Push(rec); + } + + virtual void Term() { + TImpl::Term(); + } +private: + TFastInDatFile<TRec> SourceFile; +};*/ + +/*template <class TRec, template <typename T> class TCompare, class TReceiver = TPushOutDatFile<TRec> > +class TFileNextDatMerger: public TNextDatMerger<TRec, TFastInDatFile<TRec>, TFastInDatFile<TRec>, TCompare, TReceiver> { + typedef TNextDatMerger<TRec, TFastInDatFile<TRec>, TFastInDatFile<TRec>, TCompare, TReceiver> TImpl; +public: + TFileNextDatMerger(const char* sourceAname, const char* sourceBname, TReceiver& receiver, ui8 mergeType) + : TImpl(FileA, FileB, receiver, mergeType) + , FileA(sourceAname) + , FileB(sourceBname) + { + } + + virtual void Work() { + TImpl::Work(); + } +private: + TFastInDatFile<TRec> FileA; + TFastInDatFile<TRec> FileB; +};*/ + +template <class TRec, template <typename T> class TPredicate> +class TDatNextFilter: public IDatNextChannel<TRec, TRec> { +public: + TDatNextFilter(IDatNextSource<TRec>& source) + : IDatNextChannel<TRec, TRec>(source) + { + } + + virtual const TRec* Next() { + const TRec* rec; + while ((rec = IDatNextChannel<TRec, TRec>::Source.Next()) != 0 && !Check(rec)) { + } + if (!rec) + return 0; + return rec; + } + +protected: + virtual bool Check(const TRec* rec) { + return TPredicate<TRec>()(rec); + } +}; + +template <class TRec, template <typename T> class TPredicate> +class TDatPushFilter: public IDatPushChannel<TRec, TRec> { +public: + TDatPushFilter(IDatPushReceiver<TRec>& receiver) + : IDatPushChannel<TRec, TRec>(receiver) + { + } + + virtual void Push(const TRec* rec) { + if (Check(rec)) + IDatPushChannel<TRec, TRec>::Receiver.Push(rec); + } + +private: + virtual bool Check(const TRec* rec) { + return TPredicate<TRec>()(rec); + } +}; + +template <class TInRec, class TOutRec, template <typename T> class TCompare> +class TDatGrouper: public IDatNextChannel<TInRec, TOutRec> { +public: + TDatGrouper(IDatNextSource<TInRec>& source) + : IDatNextChannel<TInRec, TOutRec>(source) + , Begin(true) + , Finish(false) + , HasOutput(false) + { + } + + const TOutRec* Next() { + while (CurrentRec = IDatNextChannel<TInRec, TOutRec>::Source.Next()) { + int cmp = 0; + if (Begin) { + Begin = false; + OnStart(); + } else if ((cmp = TCompare<TInRec>()(CurrentRec, LastRec, 0)) != 0) { + OnFinish(); + OnStart(); + } + OnRecord(); + LastRec = CurrentRec; + if (HasOutput) { + HasOutput = false; + return &OutRec; + } + } + if (!Finish) + OnFinish(); + Finish = true; + if (HasOutput) { + HasOutput = false; + return &OutRec; + } + return 0; + } + +protected: + virtual void OnStart() = 0; + virtual void OnRecord() = 0; + virtual void OnFinish() = 0; + + const TInRec* CurrentRec; + const TInRec* LastRec; + TOutRec OutRec; + + bool Begin; + bool Finish; + bool HasOutput; +}; diff --git a/library/cpp/microbdb/ya.make b/library/cpp/microbdb/ya.make new file mode 100644 index 0000000000..3e553f8535 --- /dev/null +++ b/library/cpp/microbdb/ya.make @@ -0,0 +1,36 @@ +LIBRARY() + +SRCS( + align.h + compressed.h + extinfo.h + file.cpp + hashes.h + header.h + header.cpp + heap.h + input.h + microbdb.cpp + noextinfo.proto + output.h + powersorter.h + reader.h + safeopen.h + sorter.h + sorterdef.h + utility.h + wrappers.h +) + +PEERDIR( + contrib/libs/fastlz + contrib/libs/libc_compat + contrib/libs/protobuf + contrib/libs/snappy + contrib/libs/zlib + library/cpp/deprecated/fgood + library/cpp/on_disk/st_hash + library/cpp/packedtypes +) + +END() diff --git a/library/cpp/on_disk/CMakeLists.txt b/library/cpp/on_disk/CMakeLists.txt index 4202947169..ade3b33c9a 100644 --- a/library/cpp/on_disk/CMakeLists.txt +++ b/library/cpp/on_disk/CMakeLists.txt @@ -7,3 +7,4 @@ add_subdirectory(chunks) +add_subdirectory(st_hash) diff --git a/library/cpp/on_disk/st_hash/CMakeLists.darwin-x86_64.txt b/library/cpp/on_disk/st_hash/CMakeLists.darwin-x86_64.txt new file mode 100644 index 0000000000..ad332fef62 --- /dev/null +++ b/library/cpp/on_disk/st_hash/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,18 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(cpp-on_disk-st_hash) +target_link_libraries(cpp-on_disk-st_hash PUBLIC + contrib-libs-cxxsupp + yutil + cpp-deprecated-mapped_file +) +target_sources(cpp-on_disk-st_hash PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/on_disk/st_hash/fake.cpp +) diff --git a/library/cpp/on_disk/st_hash/CMakeLists.linux-aarch64.txt b/library/cpp/on_disk/st_hash/CMakeLists.linux-aarch64.txt new file mode 100644 index 0000000000..737875ca6c --- /dev/null +++ b/library/cpp/on_disk/st_hash/CMakeLists.linux-aarch64.txt @@ -0,0 +1,19 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(cpp-on_disk-st_hash) +target_link_libraries(cpp-on_disk-st_hash PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + cpp-deprecated-mapped_file +) +target_sources(cpp-on_disk-st_hash PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/on_disk/st_hash/fake.cpp +) diff --git a/library/cpp/on_disk/st_hash/CMakeLists.linux-x86_64.txt b/library/cpp/on_disk/st_hash/CMakeLists.linux-x86_64.txt new file mode 100644 index 0000000000..737875ca6c --- /dev/null +++ b/library/cpp/on_disk/st_hash/CMakeLists.linux-x86_64.txt @@ -0,0 +1,19 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(cpp-on_disk-st_hash) +target_link_libraries(cpp-on_disk-st_hash PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + cpp-deprecated-mapped_file +) +target_sources(cpp-on_disk-st_hash PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/on_disk/st_hash/fake.cpp +) diff --git a/library/cpp/on_disk/st_hash/CMakeLists.txt b/library/cpp/on_disk/st_hash/CMakeLists.txt new file mode 100644 index 0000000000..f8b31df0c1 --- /dev/null +++ b/library/cpp/on_disk/st_hash/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) + include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/library/cpp/on_disk/st_hash/CMakeLists.windows-x86_64.txt b/library/cpp/on_disk/st_hash/CMakeLists.windows-x86_64.txt new file mode 100644 index 0000000000..ad332fef62 --- /dev/null +++ b/library/cpp/on_disk/st_hash/CMakeLists.windows-x86_64.txt @@ -0,0 +1,18 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(cpp-on_disk-st_hash) +target_link_libraries(cpp-on_disk-st_hash PUBLIC + contrib-libs-cxxsupp + yutil + cpp-deprecated-mapped_file +) +target_sources(cpp-on_disk-st_hash PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/on_disk/st_hash/fake.cpp +) diff --git a/library/cpp/on_disk/st_hash/fake.cpp b/library/cpp/on_disk/st_hash/fake.cpp new file mode 100644 index 0000000000..ef5af4d432 --- /dev/null +++ b/library/cpp/on_disk/st_hash/fake.cpp @@ -0,0 +1,4 @@ +#include "save_stl.h" +#include "static_hash.h" +#include "static_hash_map.h" +#include "sthash_iterators.h" diff --git a/library/cpp/on_disk/st_hash/save_stl.h b/library/cpp/on_disk/st_hash/save_stl.h new file mode 100644 index 0000000000..00f8f0e20d --- /dev/null +++ b/library/cpp/on_disk/st_hash/save_stl.h @@ -0,0 +1,84 @@ +#pragma once + +#include <util/generic/hash.h> +#include <util/system/yassert.h> +#include <util/stream/output.h> + +// this structure might be replaced with sthashtable class +template <class HF, class Eq, class size_type> +struct sthashtable_nvm_sv { + sthashtable_nvm_sv() { + if (sizeof(sthashtable_nvm_sv) != sizeof(HF) + sizeof(Eq) + 3 * sizeof(size_type)) { + memset(this, 0, sizeof(sthashtable_nvm_sv)); + } + } + + sthashtable_nvm_sv(const HF& phf, const Eq& peq, const size_type& pnb, const size_type& pne, const size_type& pnd) + : sthashtable_nvm_sv() + { + hf = phf; + eq = peq; + num_buckets = pnb; + num_elements = pne; + data_end_off = pnd; + } + + HF hf; + Eq eq; + size_type num_buckets; + size_type num_elements; + size_type data_end_off; +}; + +/** + * Some hack to save both THashMap and sthash. + * Working with stHash does not depend on the template parameters, because the content of stHash is not used inside this method. + */ +template <class V, class K, class HF, class Ex, class Eq, class A> +template <class KeySaver> +inline int THashTable<V, K, HF, Ex, Eq, A>::save_for_st(IOutputStream* stream, KeySaver& ks, sthash<int, int, THash<int>, TEqualTo<int>, typename KeySaver::TSizeType>* stHash) const { + Y_ASSERT(!stHash || stHash->bucket_count() == bucket_count()); + typedef sthashtable_nvm_sv<HF, Eq, typename KeySaver::TSizeType> sv_type; + sv_type sv = {this->_get_hash_fun(), this->_get_key_eq(), static_cast<typename KeySaver::TSizeType>(buckets.size()), static_cast<typename KeySaver::TSizeType>(num_elements), 0}; + // to do: m.b. use just the size of corresponding object? + typename KeySaver::TSizeType cur_off = sizeof(sv_type) + + (sv.num_buckets + 1) * sizeof(typename KeySaver::TSizeType); + sv.data_end_off = cur_off; + const_iterator n; + for (n = begin(); n != end(); ++n) { + sv.data_end_off += static_cast<typename KeySaver::TSizeType>(ks.GetRecordSize(*n)); + } + typename KeySaver::TSizeType* sb = stHash ? (typename KeySaver::TSizeType*)(stHash->buckets()) : nullptr; + if (stHash) + sv.data_end_off += static_cast<typename KeySaver::TSizeType>(sb[buckets.size()] - sb[0]); + //saver.Align(sizeof(char*)); + stream->Write(&sv, sizeof(sv)); + + size_type i; + //save vector + for (i = 0; i < buckets.size(); ++i) { + node* cur = buckets[i]; + stream->Write(&cur_off, sizeof(cur_off)); + if (cur) { + while (!((uintptr_t)cur & 1)) { + cur_off += static_cast<typename KeySaver::TSizeType>(ks.GetRecordSize(cur->val)); + cur = cur->next; + } + } + if (stHash) + cur_off += static_cast<typename KeySaver::TSizeType>(sb[i + 1] - sb[i]); + } + stream->Write(&cur_off, sizeof(cur_off)); // end mark + for (i = 0; i < buckets.size(); ++i) { + node* cur = buckets[i]; + if (cur) { + while (!((uintptr_t)cur & 1)) { + ks.SaveRecord(stream, cur->val); + cur = cur->next; + } + } + if (stHash) + stream->Write((const char*)stHash + sb[i], sb[i + 1] - sb[i]); + } + return 0; +} diff --git a/library/cpp/on_disk/st_hash/static_hash.h b/library/cpp/on_disk/st_hash/static_hash.h new file mode 100644 index 0000000000..ca7a6ccd36 --- /dev/null +++ b/library/cpp/on_disk/st_hash/static_hash.h @@ -0,0 +1,420 @@ +#pragma once + +#include "save_stl.h" +#include "sthash_iterators.h" + +#include <util/generic/hash.h> +#include <util/generic/vector.h> +#include <util/generic/buffer.h> +#include <util/generic/cast.h> +#include <util/generic/yexception.h> // for save/load only +#include <util/stream/file.h> +#include <util/stream/buffer.h> +#include <utility> + +#include <memory> +#include <algorithm> +#include <functional> + +#include <cstdlib> +#include <cstddef> + +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable : 4624) // 'destructor could not be generated because a base class destructor is inaccessible' +#endif + +template <class HashType, class KeySaver> +inline void SaveHashToStreamEx(HashType& hash, IOutputStream* stream) { + KeySaver ks; + if (hash.save_for_st(stream, ks)) + ythrow yexception() << "Could not save hash to stream"; +} + +template <class HashType> +inline void SaveHashToStream(HashType& hash, IOutputStream* stream) { + typedef TSthashWriter<typename HashType::key_type, typename HashType::mapped_type, ui64> KeySaver; + return SaveHashToStreamEx<HashType, KeySaver>(hash, stream); +} + +template <class HashType, class KeySaver> +inline void SaveHashToFileEx(HashType& hash, const char* fileName) { + TFileOutput output(fileName); + SaveHashToStreamEx<HashType, KeySaver>(hash, &output); +} + +template <class HashType> +inline void SaveHashToFile(HashType& hash, const char* fileName) { + typedef TSthashWriter<typename HashType::key_type, typename HashType::mapped_type, ui64> KeySaver; + return SaveHashToFileEx<HashType, KeySaver>(hash, fileName); +} + +template <class HashType> +inline void SaveHashSetToFile(HashType& hash, const char* fileName) { + typedef TSthashSetWriter<typename HashType::key_type, ui64> KeySaver; + return SaveHashToFileEx<HashType, KeySaver>(hash, fileName); +} + +template <class HashType> +inline void SaveHashToFile32(HashType& hash, const char* fileName) { + typedef TSthashWriter<typename HashType::key_type, typename HashType::mapped_type, ui32> KeySaver; + return SaveHashToFileEx<HashType, KeySaver>(hash, fileName); +} + +template <class HashType, class KeySaver> +inline void SaveHashToBufferEx(HashType& hash, TBuffer& buffer, sthash<int, int, THash<int>, TEqualTo<int>, typename KeySaver::TSizeType>* stHash = nullptr) { + TBufferOutput stream(buffer); + KeySaver ks; + if (hash.save_for_st(&stream, ks, stHash)) + ythrow yexception() << "Could not save hash to memory"; +} + +template <class HashType> +inline void SaveHashToBuffer(HashType& hash, TBuffer& buffer) { + typedef TSthashWriter<typename HashType::key_type, typename HashType::mapped_type, ui64> KeySaver; + SaveHashToBufferEx<HashType, KeySaver>(hash, buffer); +} + +/** + * Some hack to save both THashMap and sthash. + * THashMap and sthash must have same bucket_count(). + */ +template <class HashType, class StHashType> +inline void SaveHashToBuffer(HashType& hash, TBuffer& buffer, StHashType* stHash) { + typedef TSthashWriter<typename HashType::key_type, typename HashType::mapped_type, ui64> KeySaver; + typedef sthash<int, int, THash<int>, TEqualTo<int>, typename KeySaver::TSizeType>* SH; + + SH sh = reinterpret_cast<SH>(stHash); + SaveHashToBufferEx<HashType, KeySaver>(hash, buffer, sh); +} + +template <class HashType> +inline void SaveHashToBuffer32(HashType& hash, TBuffer& buffer) { + typedef TSthashWriter<typename HashType::key_type, typename HashType::mapped_type, ui32> KeySaver; + SaveHashToBufferEx<HashType, KeySaver>(hash, buffer); +} + +template <class Iter, typename size_type_f = ui64> +class sthashtable { +public: + typedef typename Iter::TKeyType key_type; + typedef typename Iter::TValueType value_type; + typedef typename Iter::THasherType hasher; + typedef typename Iter::TKeyEqualType key_equal; + + typedef size_type_f size_type; + typedef ptrdiff_t difference_type; + typedef const value_type* const_pointer; + typedef const value_type& const_reference; + + typedef Iter const_iterator; + + const hasher hash_funct() const { + return hash; + } + const key_equal key_eq() const { + return equals; + } + +private: + const hasher hash; + const key_equal equals; + +private: + const_iterator iter_at_bucket(size_type bucket) const { + return (const_iterator)(((char*)this + buckets()[bucket])); + } + + const_iterator iter_at_bucket_or_end(size_type bucket) const { + if (bucket < num_buckets) + return (const_iterator)(((char*)this + buckets()[bucket])); + else + return end(); + } + + const size_type num_buckets; + const size_type num_elements; + const size_type data_end_off; + +protected: //shut up gcc warning + // we can't construct/destroy this object at all! + sthashtable(); + sthashtable(const sthashtable& ht); + ~sthashtable(); + +public: + // const size_type *buckets; + const size_type* buckets() const { + return (size_type*)((char*)this + sizeof(*this)); + } + const size_type buckets(size_type n) const { + return buckets()[n]; + } + + size_type size() const { + return num_elements; + } + size_type max_size() const { + return size_type(-1); + } + bool empty() const { + return size() == 0; + } + + const_iterator begin() const { + return num_buckets ? iter_at_bucket(0) : end(); + } + + const_iterator end() const { + return (const_iterator)(((char*)this + data_end_off)); + } + +public: + size_type size_in_bytes() const { + return data_end_off; + } + + size_type bucket_count() const { + return num_buckets; + } + + size_type elems_in_bucket(size_type bucket) const { + size_type result = 0; + const_iterator first = iter_at_bucket(bucket); + const_iterator last = iter_at_bucket_or_end(bucket + 1); + + for (; first != last; ++first) + ++result; + return result; + } + + template <class TheKey> + const_iterator find(const TheKey& key) const { + size_type n = bkt_num_key(key); + const_iterator first(iter_at_bucket(n)), last(iter_at_bucket_or_end(n + 1)); + for (; + first != last && !first.KeyEquals(equals, key); + ++first) { + } + if (first != last) + return first; + return end(); + } + + size_type count(const key_type& key) const { + const size_type n = bkt_num_key(key); + size_type result = 0; + const_iterator first = iter_at_bucket(n); + const_iterator last = iter_at_bucket_or_end(n + 1); + + for (; first != last; ++first) + if (first.KeyEquals(equals, key)) + ++result; + return result; + } + + std::pair<const_iterator, const_iterator> equal_range(const key_type& key) const; + +private: + template <class TheKey> + size_type bkt_num_key(const TheKey& key) const { + return hash(key) % num_buckets; + } +}; + +template <class I, class size_type_f> +std::pair<I, I> sthashtable<I, size_type_f>::equal_range(const key_type& key) const { + typedef std::pair<const_iterator, const_iterator> pii; + const size_type n = bkt_num_key(key); + const_iterator first = iter_at_bucket(n); + const_iterator last = iter_at_bucket_or_end(n + 1); + + for (; first != last; ++first) { + if (first.KeyEquals(equals, key)) { + const_iterator cur = first; + ++cur; + for (; cur != last; ++cur) + if (!cur.KeyEquals(equals, key)) + return pii(const_iterator(first), + const_iterator(cur)); + return pii(const_iterator(first), + const_iterator(last)); + } + } + return pii(end(), end()); +} + +/* end __SGI_STL_HASHTABLE_H */ + +template <class Key, class T, class HashFcn /*= hash<Key>*/, + class EqualKey = TEqualTo<Key>, typename size_type_f = ui64> +class sthash { +private: + typedef sthashtable<TSthashIterator<const Key, const T, HashFcn, EqualKey>, size_type_f> ht; + ht rep; + +public: + typedef typename ht::key_type key_type; + typedef typename ht::value_type value_type; + typedef typename ht::hasher hasher; + typedef typename ht::key_equal key_equal; + typedef T mapped_type; + + typedef typename ht::size_type size_type; + typedef typename ht::difference_type difference_type; + typedef typename ht::const_pointer const_pointer; + typedef typename ht::const_reference const_reference; + + typedef typename ht::const_iterator const_iterator; + + const hasher hash_funct() const { + return rep.hash_funct(); + } + const key_equal key_eq() const { + return rep.key_eq(); + } + +public: + size_type size() const { + return rep.size(); + } + size_type max_size() const { + return rep.max_size(); + } + bool empty() const { + return rep.empty(); + } + + const_iterator begin() const { + return rep.begin(); + } + const_iterator end() const { + return rep.end(); + } + +public: + template <class TheKey> + const_iterator find(const TheKey& key) const { + return rep.find(key); + } + template <class TheKey> + bool has(const TheKey& key) const { + return rep.find(key) != rep.end(); + } + + size_type count(const key_type& key) const { + return rep.count(key); + } + + std::pair<const_iterator, const_iterator> equal_range(const key_type& key) const { + return rep.equal_range(key); + } + + size_type size_in_bytes() const { + return rep.size_in_bytes(); + } + + size_type bucket_count() const { + return rep.bucket_count(); + } + size_type max_bucket_count() const { + return rep.max_bucket_count(); + } + size_type elems_in_bucket(size_type n) const { + return rep.elems_in_bucket(n); + } + + const size_type* buckets() const { + return rep.buckets(); + } + const size_type buckets(size_type n) const { + return rep.buckets()[n]; + } +}; + +template <class Key, class HashFcn, + class EqualKey = TEqualTo<Key>, typename size_type_f = ui64> +class sthash_set: public sthash<Key, TEmptyValue, HashFcn, EqualKey, size_type_f> { + typedef sthash<Key, TEmptyValue, HashFcn, EqualKey, size_type_f> Base; + +public: + using Base::const_iterator; + using Base::hasher; + using Base::key_equal; + using Base::key_type; + using Base::size_type; + using Base::value_type; +}; + +template <class Key, class T, class HashFcn /*= hash<Key>*/, + class EqualKey = TEqualTo<Key>, typename size_type_f = ui64> +class sthash_mm { +private: + typedef sthashtable<TSthashIterator<const Key, T, HashFcn, EqualKey>, size_type_f> ht; + ht rep; + +public: + typedef typename ht::key_type key_type; + typedef typename ht::value_type value_type; + typedef typename ht::hasher hasher; + typedef typename ht::key_equal key_equal; + typedef T mapped_type; + + typedef typename ht::size_type size_type; + typedef typename ht::difference_type difference_type; + typedef typename ht::const_pointer const_pointer; + typedef typename ht::const_reference const_reference; + + typedef typename ht::const_iterator const_iterator; + + const hasher hash_funct() const { + return rep.hash_funct(); + } + const key_equal key_eq() const { + return rep.key_eq(); + } + +public: + size_type size() const { + return rep.size(); + } + size_type max_size() const { + return rep.max_size(); + } + bool empty() const { + return rep.empty(); + } + + const_iterator begin() const { + return rep.begin(); + } + const_iterator end() const { + return rep.end(); + } + + const_iterator find(const key_type& key) const { + return rep.find(key); + } + + size_type count(const key_type& key) const { + return rep.count(key); + } + + std::pair<const_iterator, const_iterator> equal_range(const key_type& key) const { + return rep.equal_range(key); + } + + size_type bucket_count() const { + return rep.bucket_count(); + } + size_type max_bucket_count() const { + return rep.max_bucket_count(); + } + size_type elems_in_bucket(size_type n) const { + return rep.elems_in_bucket(n); + } +}; + +#ifdef _MSC_VER +#pragma warning(pop) +#endif diff --git a/library/cpp/on_disk/st_hash/static_hash_map.h b/library/cpp/on_disk/st_hash/static_hash_map.h new file mode 100644 index 0000000000..5dc50abd39 --- /dev/null +++ b/library/cpp/on_disk/st_hash/static_hash_map.h @@ -0,0 +1,59 @@ +#pragma once + +#include "static_hash.h" + +#include <library/cpp/deprecated/mapped_file/mapped_file.h> + +#include <util/system/filemap.h> + +template <class SH> +struct sthash_mapped_c { + typedef SH H; + typedef typename H::const_iterator const_iterator; + TMappedFile M; + H* hsh; + sthash_mapped_c() + : M() + , hsh(nullptr) + { + } + sthash_mapped_c(const char* fname, bool precharge) + : M() + , hsh(nullptr) + { + Open(fname, precharge); + } + void Open(const char* fname, bool precharge) { + M.init(fname); + if (precharge) + M.precharge(); + hsh = (H*)M.getData(); + if (M.getSize() < sizeof(H) || (ssize_t)M.getSize() != hsh->end().Data - (char*)hsh) + ythrow yexception() << "Could not map hash: " << fname << " is damaged"; + } + H* operator->() { + return hsh; + } + const H* operator->() const { + return hsh; + } + H* GetSthash() { + return hsh; + } + const H* GetSthash() const { + return hsh; + } +}; + +template <class Key, class T, class Hash> +struct sthash_mapped: public sthash_mapped_c<sthash<Key, T, Hash>> { + typedef sthash<Key, T, Hash> H; + sthash_mapped(const char* fname, bool precharge) + : sthash_mapped_c<H>(fname, precharge) + { + } + sthash_mapped() + : sthash_mapped_c<H>() + { + } +}; diff --git a/library/cpp/on_disk/st_hash/sthash_iterators.h b/library/cpp/on_disk/st_hash/sthash_iterators.h new file mode 100644 index 0000000000..6a9ebdd6c3 --- /dev/null +++ b/library/cpp/on_disk/st_hash/sthash_iterators.h @@ -0,0 +1,334 @@ +#pragma once + +#include "save_stl.h" + +#include <util/system/align.h> + +/** + This file provides functionality for saving some relatively simple THashMap object + to disk in a form that can be mapped read-only (via mmap) at any address. + That saved object is accessed via pointer to sthash object (that must have + the same parameters as original THashMap object) + + If either key or value are variable-sized (i.e. contain pointers), user must + write his own instantiation of TSthashIterator (read iterator for sthash) and + TSthashWriter (write iterator for THashMap). + An example for <const char *, B> pair is in here. +**/ + +// TEmptyValue and SizeOfEx are helpers for sthash_set +struct TEmptyValue { + TEmptyValue() = default; +}; + +template <class T> +inline size_t SizeOfEx() { + return sizeof(T); +} + +template <> +inline size_t SizeOfEx<TEmptyValue>() { + return 0; +} +template <> +inline size_t SizeOfEx<const TEmptyValue>() { + return 0; +} + +template <class TKey, class TValue, class HashFcn, class EqualKey> +struct TSthashIterator { + // Implementation for simple types + typedef const TKey TKeyType; + typedef const TValue TValueType; + typedef EqualKey TKeyEqualType; + typedef HashFcn THasherType; + + const char* Data; + TSthashIterator() + : Data(nullptr) + { + } + explicit TSthashIterator(const char* data) + : Data(data) + { + } + void operator++() { + Data += GetLength(); + } + + bool operator!=(const TSthashIterator& that) const { + return Data != that.Data; + } + bool operator==(const TSthashIterator& that) const { + return Data == that.Data; + } + TKey& Key() const { + return *(TKey*)Data; + } + TValue& Value() { + return *(TValue*)(Data + sizeof(TKey)); + } + const TValue& Value() const { + return *(const TValue*)(Data + sizeof(TKey)); + } + + template <class AnotherKeyType> + bool KeyEquals(const EqualKey& eq, const AnotherKeyType& key) const { + return eq(*(TKey*)Data, key); + } + + size_t GetLength() const { + return sizeof(TKey) + SizeOfEx<TValue>(); + } +}; + +template <class Key, class Value, typename size_type_o = ui64> +struct TSthashWriter { + typedef size_type_o TSizeType; + size_t GetRecordSize(const std::pair<const Key, const Value>&) const { + return sizeof(Key) + SizeOfEx<Value>(); + } + int SaveRecord(IOutputStream* stream, const std::pair<const Key, const Value>& record) const { + stream->Write(&record.first, sizeof(Key)); + stream->Write(&record.second, SizeOfEx<Value>()); + return 0; + } +}; + +// Remember that this simplified implementation makes a copy of `key' in std::make_pair. +// It can also waste some memory on undesired alignment. +template <class Key, typename size_type_o = ui64> +struct TSthashSetWriter: public TSthashWriter<Key, TEmptyValue, size_type_o> { + typedef TSthashWriter<Key, TEmptyValue, size_type_o> MapWriter; + size_t GetRecordSize(const Key& key) const { + return MapWriter::GetRecordSize(std::make_pair(key, TEmptyValue())); + } + int SaveRecord(IOutputStream* stream, const Key& key) const { + return MapWriter::SaveRecord(stream, std::make_pair(key, TEmptyValue())); + } +}; + +// we can't save something with pointers without additional tricks + +template <class A, class B, class HashFcn, class EqualKey> +struct TSthashIterator<A*, B, HashFcn, EqualKey> {}; + +template <class A, class B, class HashFcn, class EqualKey> +struct TSthashIterator<A, B*, HashFcn, EqualKey> {}; + +template <class A, class B, typename size_type_o> +struct TSthashWriter<A*, B*, size_type_o> {}; + +template <class A, class B, typename size_type_o> +struct TSthashWriter<A*, B, size_type_o> {}; + +template <class A, class B, typename size_type_o> +struct TSthashWriter<A, B*, size_type_o> {}; + +template <class T> +inline size_t AlignForChrKey() { + return 4; // TODO: change this (requeres rebuilt of a few existing files) +} + +template <> +inline size_t AlignForChrKey<TEmptyValue>() { + return 1; +} + +template <> +inline size_t AlignForChrKey<const TEmptyValue>() { + return AlignForChrKey<TEmptyValue>(); +} + +// !! note that for char*, physical placement of key and value is swapped +template <class TValue, class HashFcn, class EqualKey> +struct TSthashIterator<const char* const, TValue, HashFcn, EqualKey> { + typedef const TValue TValueType; + typedef const char* TKeyType; + typedef EqualKey TKeyEqualType; + typedef HashFcn THasherType; + + const char* Data; + TSthashIterator() + : Data(nullptr) + { + } + TSthashIterator(const char* data) + : Data(data) + { + } + void operator++() { + Data += GetLength(); + } + + bool operator!=(const TSthashIterator& that) const { + return Data != that.Data; + } + bool operator==(const TSthashIterator& that) const { + return Data == that.Data; + } + const char* Key() const { + return Data + SizeOfEx<TValue>(); + } + TValue& Value() { + return *(TValue*)Data; + } + const TValue& Value() const { + return *(const TValue*)Data; + } + + template <class K> + bool KeyEquals(const EqualKey& eq, const K& k) const { + return eq(Data + SizeOfEx<TValue>(), k); + } + + size_t GetLength() const { + size_t length = strlen(Data + SizeOfEx<TValue>()) + 1 + SizeOfEx<TValue>(); + length = AlignUp(length, AlignForChrKey<TValue>()); + return length; + } +}; + +template <class Value, typename size_type_o> +struct TSthashWriter<const char*, Value, size_type_o> { + typedef size_type_o TSizeType; + size_t GetRecordSize(const std::pair<const char*, const Value>& record) const { + size_t length = strlen(record.first) + 1 + SizeOfEx<Value>(); + length = AlignUp(length, AlignForChrKey<Value>()); + return length; + } + int SaveRecord(IOutputStream* stream, const std::pair<const char*, const Value>& record) const { + const char* alignBuffer = "qqqq"; + stream->Write(&record.second, SizeOfEx<Value>()); + size_t length = strlen(record.first) + 1; + stream->Write(record.first, length); + length = AlignUpSpace(length, AlignForChrKey<Value>()); + if (length) + stream->Write(alignBuffer, length); + return 0; + } +}; + +template <class TKey, class HashFcn, class EqualKey> +struct TSthashIterator<TKey, const char* const, HashFcn, EqualKey> { + typedef const TKey TKeyType; + typedef const char* TValueType; + typedef EqualKey TKeyEqualType; + typedef HashFcn THasherType; + + const char* Data; + TSthashIterator() + : Data(nullptr) + { + } + TSthashIterator(const char* data) + : Data(data) + { + } + void operator++() { + Data += GetLength(); + } + + bool operator!=(const TSthashIterator& that) const { + return Data != that.Data; + } + bool operator==(const TSthashIterator& that) const { + return Data == that.Data; + } + TKey& Key() { + return *(TKey*)Data; + } + const char* Value() const { + return Data + sizeof(TKey); + } + + template <class K> + bool KeyEquals(const EqualKey& eq, const K& k) const { + return eq(*(TKey*)Data, k); + } + + size_t GetLength() const { + size_t length = strlen(Data + sizeof(TKey)) + 1 + sizeof(TKey); + length = AlignUp(length, (size_t)4); + return length; + } +}; + +template <class Key, typename size_type_o> +struct TSthashWriter<Key, const char*, size_type_o> { + typedef size_type_o TSizeType; + size_t GetRecordSize(const std::pair<const Key, const char*>& record) const { + size_t length = strlen(record.second) + 1 + sizeof(Key); + length = AlignUp(length, (size_t)4); + return length; + } + int SaveRecord(IOutputStream* stream, const std::pair<const Key, const char*>& record) const { + const char* alignBuffer = "qqqq"; + stream->Write(&record.first, sizeof(Key)); + size_t length = strlen(record.second) + 1; + stream->Write(record.second, length); + length = AlignUpSpace(length, (size_t)4); + if (length) + stream->Write(alignBuffer, length); + return 0; + } +}; + +template <class HashFcn, class EqualKey> +struct TSthashIterator<const char* const, const char* const, HashFcn, EqualKey> { + typedef const char* TKeyType; + typedef const char* TValueType; + typedef EqualKey TKeyEqualType; + typedef HashFcn THasherType; + + const char* Data; + TSthashIterator() + : Data(nullptr) + { + } + TSthashIterator(const char* data) + : Data(data) + { + } + void operator++() { + Data += GetLength(); + } + + bool operator!=(const TSthashIterator& that) const { + return Data != that.Data; + } + bool operator==(const TSthashIterator& that) const { + return Data == that.Data; + } + const char* Key() const { + return Data; + } + const char* Value() const { + return Data + strlen(Data) + 1; + } + + template <class K> + bool KeyEquals(const EqualKey& eq, const K& k) const { + return eq(Data, k); + } + + size_t GetLength() const { + size_t length = strlen(Data) + 1; + length += strlen(Data + length) + 1; + return length; + } +}; + +template <typename size_type_o> +struct TSthashWriter<const char*, const char*, size_type_o> { + typedef size_type_o TSizeType; + size_t GetRecordSize(const std::pair<const char*, const char*>& record) const { + size_t size = strlen(record.first) + strlen(record.second) + 2; + return size; + } + int SaveRecord(IOutputStream* stream, const std::pair<const char*, const char*>& record) const { + stream->Write(record.first, strlen(record.first) + 1); + stream->Write(record.second, strlen(record.second) + 1); + return 0; + } +}; diff --git a/library/cpp/on_disk/st_hash/ya.make b/library/cpp/on_disk/st_hash/ya.make new file mode 100644 index 0000000000..8c6d05711c --- /dev/null +++ b/library/cpp/on_disk/st_hash/ya.make @@ -0,0 +1,15 @@ +LIBRARY() + +SRCS( + fake.cpp + save_stl.h + static_hash.h + static_hash_map.h + sthash_iterators.h +) + +PEERDIR( + library/cpp/deprecated/mapped_file +) + +END() diff --git a/library/cpp/regex/CMakeLists.darwin-x86_64.txt b/library/cpp/regex/CMakeLists.darwin-x86_64.txt index 6e2a4fabcd..877d40538b 100644 --- a/library/cpp/regex/CMakeLists.darwin-x86_64.txt +++ b/library/cpp/regex/CMakeLists.darwin-x86_64.txt @@ -6,6 +6,7 @@ # original buildsystem will not be accepted. +add_subdirectory(glob) add_subdirectory(hyperscan) add_subdirectory(pcre) add_subdirectory(pire) diff --git a/library/cpp/regex/CMakeLists.linux-aarch64.txt b/library/cpp/regex/CMakeLists.linux-aarch64.txt index 279390306b..84c257a819 100644 --- a/library/cpp/regex/CMakeLists.linux-aarch64.txt +++ b/library/cpp/regex/CMakeLists.linux-aarch64.txt @@ -6,5 +6,6 @@ # original buildsystem will not be accepted. +add_subdirectory(glob) add_subdirectory(pcre) add_subdirectory(pire) diff --git a/library/cpp/regex/CMakeLists.linux-x86_64.txt b/library/cpp/regex/CMakeLists.linux-x86_64.txt index 6e2a4fabcd..877d40538b 100644 --- a/library/cpp/regex/CMakeLists.linux-x86_64.txt +++ b/library/cpp/regex/CMakeLists.linux-x86_64.txt @@ -6,6 +6,7 @@ # original buildsystem will not be accepted. +add_subdirectory(glob) add_subdirectory(hyperscan) add_subdirectory(pcre) add_subdirectory(pire) diff --git a/library/cpp/regex/CMakeLists.windows-x86_64.txt b/library/cpp/regex/CMakeLists.windows-x86_64.txt index 6e2a4fabcd..877d40538b 100644 --- a/library/cpp/regex/CMakeLists.windows-x86_64.txt +++ b/library/cpp/regex/CMakeLists.windows-x86_64.txt @@ -6,6 +6,7 @@ # original buildsystem will not be accepted. +add_subdirectory(glob) add_subdirectory(hyperscan) add_subdirectory(pcre) add_subdirectory(pire) diff --git a/library/cpp/regex/glob/CMakeLists.darwin-x86_64.txt b/library/cpp/regex/glob/CMakeLists.darwin-x86_64.txt new file mode 100644 index 0000000000..ca8383e355 --- /dev/null +++ b/library/cpp/regex/glob/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,19 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(cpp-regex-glob) +target_link_libraries(cpp-regex-glob PUBLIC + contrib-libs-cxxsupp + yutil + library-cpp-charset +) +target_sources(cpp-regex-glob PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/regex/glob/glob.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/regex/glob/glob_iterator.cpp +) diff --git a/library/cpp/regex/glob/CMakeLists.linux-aarch64.txt b/library/cpp/regex/glob/CMakeLists.linux-aarch64.txt new file mode 100644 index 0000000000..3953937c6d --- /dev/null +++ b/library/cpp/regex/glob/CMakeLists.linux-aarch64.txt @@ -0,0 +1,20 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(cpp-regex-glob) +target_link_libraries(cpp-regex-glob PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + library-cpp-charset +) +target_sources(cpp-regex-glob PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/regex/glob/glob.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/regex/glob/glob_iterator.cpp +) diff --git a/library/cpp/regex/glob/CMakeLists.linux-x86_64.txt b/library/cpp/regex/glob/CMakeLists.linux-x86_64.txt new file mode 100644 index 0000000000..3953937c6d --- /dev/null +++ b/library/cpp/regex/glob/CMakeLists.linux-x86_64.txt @@ -0,0 +1,20 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(cpp-regex-glob) +target_link_libraries(cpp-regex-glob PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + library-cpp-charset +) +target_sources(cpp-regex-glob PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/regex/glob/glob.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/regex/glob/glob_iterator.cpp +) diff --git a/library/cpp/regex/glob/CMakeLists.txt b/library/cpp/regex/glob/CMakeLists.txt new file mode 100644 index 0000000000..f8b31df0c1 --- /dev/null +++ b/library/cpp/regex/glob/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) + include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/library/cpp/regex/glob/CMakeLists.windows-x86_64.txt b/library/cpp/regex/glob/CMakeLists.windows-x86_64.txt new file mode 100644 index 0000000000..ca8383e355 --- /dev/null +++ b/library/cpp/regex/glob/CMakeLists.windows-x86_64.txt @@ -0,0 +1,19 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(cpp-regex-glob) +target_link_libraries(cpp-regex-glob PUBLIC + contrib-libs-cxxsupp + yutil + library-cpp-charset +) +target_sources(cpp-regex-glob PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/regex/glob/glob.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/regex/glob/glob_iterator.cpp +) diff --git a/library/cpp/regex/glob/glob.cpp b/library/cpp/regex/glob/glob.cpp new file mode 100644 index 0000000000..9da058122a --- /dev/null +++ b/library/cpp/regex/glob/glob.cpp @@ -0,0 +1,921 @@ +#define FROM_IMPLEMENTATION +#include "glob_compat.h" + +#if defined(USE_INTERNAL_GLOB) +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Guido van Rossum. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <library/cpp/charset/ci_string.h> +#include <util/system/compat.h> +#include <util/folder/dirut.h> + +/* + * glob(3) -- a superset of the one defined in POSIX 1003.2. + * + * The [!...] convention to negate a range is supported (SysV, Posix, ksh). + * + * Optional extra services, controlled by flags not defined by POSIX: + * + * GLOB_QUOTE: + * Escaping convention: \ inhibits any special meaning the following + * character might have (except \ at end of string is retained). + * GLOB_MAGCHAR: + * Set in gl_flags if pattern contained a globbing character. + * GLOB_NOMAGIC: + * Same as GLOB_NOCHECK, but it will only append pattern if it did + * not contain any magic characters. [Used in csh style globbing] + * GLOB_ALTDIRFUNC: + * Use alternately specified directory access functions. + * GLOB_TILDE: + * expand ~user/foo to the /home/dir/of/user/foo + * GLOB_BRACE: + * expand {1,2}{a,b} to 1a 1b 2a 2b + * gl_matchc: + * Number of matches in the current invocation of glob. + */ + +/* + * Some notes on multibyte character support: + * 1. Patterns with illegal byte sequences match nothing - even if + * GLOB_NOCHECK is specified. + * 2. Illegal byte sequences in filenames are handled by treating them as + * single-byte characters with a value of the first byte of the sequence + * cast to wchar_t. + * 3. State-dependent encodings are not currently supported. + */ + +//#include <sys/param.h> +#include <sys/stat.h> + +#include <ctype.h> +//#include <dirent.h> +#include <errno.h> +#include <limits.h> +//#include <pwd.h> +//#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#if defined(_unix_) +#include <unistd.h> +#endif +#include <wchar.h> + +#if !defined(_unix_) +// silly replacement for compilation +using uint_fast64_t = ui64; +using u_int = unsigned int; +using u_char = unsigned char; +#define ARG_MAX 256 +#define S_ISDIR(x) ((x) & _S_IFDIR) +#define S_ISLNK(x) 0 +#define lstat stat +inline bool issetugid() { return false; } +inline char *getlogin() { return 0; } +inline int getuid() { return 0; } +struct passwd { + char *pw_dir; +}; +inline passwd *getpwuid(int) { return 0; } +inline passwd *getpwnam(char *) { return 0; } +#endif + +#define __collate_load_error 1 +inline int __collate_range_cmp(int, int) { return 0; } +#undef COMMA // was defined in stroka.h +// end silly replacement + +//#include "collate.h" + +#define DOLLAR '$' +#define DOT '.' +#define EOS '\0' +#define LBRACKET '[' +#define NOT '!' +#define QUESTION '?' +#define QUOTE '\\' +#define RANGE '-' +#define RBRACKET ']' +#define SEP '/' +#define STAR '*' +#define TILDE '~' +#define UNDERSCORE '_' +#define LBRACE '{' +#define RBRACE '}' +#define SLASH '/' +#define COMMA ',' + +#ifndef DEBUG + +#define M_QUOTE 0x8000000000ULL +#define M_PROTECT 0x4000000000ULL +#define M_MASK 0xffffffffffULL +#define M_CHAR 0x00ffffffffULL + +using Char = uint_fast64_t; + +#else + +#define M_QUOTE 0x80 +#define M_PROTECT 0x40 +#define M_MASK 0xff +#define M_CHAR 0x7f + +using Char = char; + +#endif + + +#define CHAR(c) ((Char)((c)&M_CHAR)) +#define META(c) ((Char)((c)|M_QUOTE)) +#define M_ALL META('*') +#define M_END META(']') +#define M_NOT META('!') +#define M_ONE META('?') +#define M_RNG META('-') +#define M_SET META('[') +#define ismeta(c) (((c)&M_QUOTE) != 0) + + +static int compare(const void *, const void *); +static int g_Ctoc(const Char *, char *, u_int); +static int g_lstat(Char *, struct stat *, glob_t *); +static DIR *g_opendir(Char *, glob_t *); +static Char *g_strchr(Char *, wchar_t); +#ifdef notdef +static Char *g_strcat(Char *, const Char *); +#endif +static int glob0(const Char *, glob_t *, int *); +static int glob1(Char *, glob_t *, int *); +static int glob2(Char *, Char *, Char *, Char *, glob_t *, int *); +static int glob3(Char *, Char *, Char *, Char *, Char *, glob_t *, int *); +static int globextend(const Char *, glob_t *, int *); +static const Char * + globtilde(const Char *, Char *, size_t, glob_t *); +static int globexp1(const Char *, glob_t *, int *); +static int globexp2(const Char *, const Char *, glob_t *, int *, int *); +static int match(Char *, Char *, Char *); +#ifdef DEBUG +static void qprintf(const char *, Char *); +#endif + +int +glob(const char *pattern, int flags, int (*errfunc)(const char *, int), glob_t *pglob) +{ + const u_char *patnext; + int limit; + Char *bufnext, *bufend, patbuf[MAXPATHLEN], prot; + mbstate_t mbs; + wchar_t wc; + size_t clen; + + patnext = (u_char *) pattern; + if (!(flags & GLOB_APPEND)) { + pglob->gl_pathc = 0; + pglob->gl_pathv = NULL; + if (!(flags & GLOB_DOOFFS)) + pglob->gl_offs = 0; + } + if (flags & GLOB_LIMIT) { + limit = pglob->gl_matchc; + if (limit == 0) + limit = ARG_MAX; + } else + limit = 0; + pglob->gl_flags = flags & ~GLOB_MAGCHAR; + pglob->gl_errfunc = errfunc; + pglob->gl_matchc = 0; + + bufnext = patbuf; + bufend = bufnext + MAXPATHLEN - 1; + if (flags & GLOB_NOESCAPE) { + memset(&mbs, 0, sizeof(mbs)); + while (bufend - bufnext >= MB_CUR_MAX) { + clen = mbrtowc(&wc, (const char*)patnext, MB_LEN_MAX, &mbs); + if (clen == (size_t)-1 || clen == (size_t)-2) + return (GLOB_NOMATCH); + else if (clen == 0) + break; + *bufnext++ = wc; + patnext += clen; + } + } else { + /* Protect the quoted characters. */ + memset(&mbs, 0, sizeof(mbs)); + while (bufend - bufnext >= MB_CUR_MAX) { + if (*patnext == QUOTE) { + if (*++patnext == EOS) { + *bufnext++ = QUOTE | M_PROTECT; + continue; + } + prot = M_PROTECT; + } else + prot = 0; + clen = mbrtowc(&wc, (const char*)patnext, MB_LEN_MAX, &mbs); + if (clen == (size_t)-1 || clen == (size_t)-2) + return (GLOB_NOMATCH); + else if (clen == 0) + break; + *bufnext++ = wc | prot; + patnext += clen; + } + } + *bufnext = EOS; + + if (flags & GLOB_BRACE) + return globexp1(patbuf, pglob, &limit); + else + return glob0(patbuf, pglob, &limit); +} + +/* + * Expand recursively a glob {} pattern. When there is no more expansion + * invoke the standard globbing routine to glob the rest of the magic + * characters + */ +static int +globexp1(const Char *pattern, glob_t *pglob, int *limit) +{ + const Char* ptr = pattern; + int rv; + + /* Protect a single {}, for find(1), like csh */ + if (pattern[0] == LBRACE && pattern[1] == RBRACE && pattern[2] == EOS) + return glob0(pattern, pglob, limit); + + while ((ptr = (const Char *) g_strchr((Char *) ptr, LBRACE)) != NULL) + if (!globexp2(ptr, pattern, pglob, &rv, limit)) + return rv; + + return glob0(pattern, pglob, limit); +} + + +/* + * Recursive brace globbing helper. Tries to expand a single brace. + * If it succeeds then it invokes globexp1 with the new pattern. + * If it fails then it tries to glob the rest of the pattern and returns. + */ +static int +globexp2(const Char *ptr, const Char *pattern, glob_t *pglob, int *rv, int *limit) +{ + int i; + Char *lm, *ls; + const Char *pe, *pm, *pm1, *pl; + Char patbuf[MAXPATHLEN]; + + /* copy part up to the brace */ + for (lm = patbuf, pm = pattern; pm != ptr; *lm++ = *pm++) + continue; + *lm = EOS; + ls = lm; + + /* Find the balanced brace */ + for (i = 0, pe = ++ptr; *pe; pe++) + if (*pe == LBRACKET) { + /* Ignore everything between [] */ + for (pm = pe++; *pe != RBRACKET && *pe != EOS; pe++) + continue; + if (*pe == EOS) { + /* + * We could not find a matching RBRACKET. + * Ignore and just look for RBRACE + */ + pe = pm; + } + } + else if (*pe == LBRACE) + i++; + else if (*pe == RBRACE) { + if (i == 0) + break; + i--; + } + + /* Non matching braces; just glob the pattern */ + if (i != 0 || *pe == EOS) { + *rv = glob0(patbuf, pglob, limit); + return 0; + } + + for (i = 0, pl = pm = ptr; pm <= pe; pm++) + switch (*pm) { + case LBRACKET: + /* Ignore everything between [] */ + for (pm1 = pm++; *pm != RBRACKET && *pm != EOS; pm++) + continue; + if (*pm == EOS) { + /* + * We could not find a matching RBRACKET. + * Ignore and just look for RBRACE + */ + pm = pm1; + } + break; + + case LBRACE: + i++; + break; + + case RBRACE: + if (i) { + i--; + break; + } + [[fallthrough]]; + case COMMA: + if (i && *pm == COMMA) + break; + else { + /* Append the current string */ + for (lm = ls; (pl < pm); *lm++ = *pl++) + continue; + /* + * Append the rest of the pattern after the + * closing brace + */ + for (pl = pe + 1; (*lm++ = *pl++) != EOS;) + continue; + + /* Expand the current pattern */ +#ifdef DEBUG + qprintf("globexp2:", patbuf); +#endif + *rv = globexp1(patbuf, pglob, limit); + + /* move after the comma, to the next string */ + pl = pm + 1; + } + break; + + default: + break; + } + *rv = 0; + return 0; +} + + + +/* + * expand tilde from the passwd file. + */ +static const Char * +globtilde(const Char *pattern, Char *patbuf, size_t patbuf_len, glob_t *pglob) +{ + struct passwd *pwd; + char *h; + const Char *p; + Char *b, *eb; + + if (*pattern != TILDE || !(pglob->gl_flags & GLOB_TILDE)) + return pattern; + + /* + * Copy up to the end of the string or / + */ + eb = &patbuf[patbuf_len - 1]; + for (p = pattern + 1, h = (char *) patbuf; + h < (char *)eb && *p && *p != SLASH; *h++ = (char)*p++) + continue; + + *h = EOS; + + if (((char *) patbuf)[0] == EOS) { + /* + * handle a plain ~ or ~/ by expanding $HOME first (iff + * we're not running setuid or setgid) and then trying + * the password file + */ + if (issetugid() != 0 || + (h = ::getenv("HOME")) == NULL) { + if (((h = getlogin()) != NULL && + (pwd = getpwnam(h)) != NULL) || + (pwd = getpwuid(getuid())) != NULL) + h = pwd->pw_dir; + else + return pattern; + } + } + else { + /* + * Expand a ~user + */ + if ((pwd = getpwnam((char*) patbuf)) == NULL) + return pattern; + else + h = pwd->pw_dir; + } + + /* Copy the home directory */ + for (b = patbuf; b < eb && *h; *b++ = *h++) + continue; + + /* Append the rest of the pattern */ + while (b < eb && (*b++ = *p++) != EOS) + continue; + *b = EOS; + + return patbuf; +} + + +/* + * The main glob() routine: compiles the pattern (optionally processing + * quotes), calls glob1() to do the real pattern matching, and finally + * sorts the list (unless unsorted operation is requested). Returns 0 + * if things went well, nonzero if errors occurred. + */ +static int +glob0(const Char *pattern, glob_t *pglob, int *limit) +{ + const Char *qpatnext; + int c, err, oldpathc; + Char *bufnext, patbuf[MAXPATHLEN]; + + qpatnext = globtilde(pattern, patbuf, MAXPATHLEN, pglob); + oldpathc = pglob->gl_pathc; + bufnext = patbuf; + + /* We don't need to check for buffer overflow any more. */ + while ((c = (char)*qpatnext++) != EOS) { + switch (c) { + case LBRACKET: + c = (char)*qpatnext; + if (c == NOT) + ++qpatnext; + if (*qpatnext == EOS || + g_strchr((Char *) qpatnext+1, RBRACKET) == NULL) { + *bufnext++ = LBRACKET; + if (c == NOT) + --qpatnext; + break; + } + *bufnext++ = M_SET; + if (c == NOT) + *bufnext++ = M_NOT; + c = (char)*qpatnext++; + do { + *bufnext++ = CHAR(c); + if (*qpatnext == RANGE && + (c = (char)qpatnext[1]) != RBRACKET) { + *bufnext++ = M_RNG; + *bufnext++ = CHAR(c); + qpatnext += 2; + } + } while ((c = (char)*qpatnext++) != RBRACKET); + pglob->gl_flags |= GLOB_MAGCHAR; + *bufnext++ = M_END; + break; + case QUESTION: + pglob->gl_flags |= GLOB_MAGCHAR; + *bufnext++ = M_ONE; + break; + case STAR: + pglob->gl_flags |= GLOB_MAGCHAR; + /* collapse adjacent stars to one, + * to avoid exponential behavior + */ + if (bufnext == patbuf || bufnext[-1] != M_ALL) + *bufnext++ = M_ALL; + break; + default: + *bufnext++ = CHAR(c); + break; + } + } + *bufnext = EOS; +#ifdef DEBUG + qprintf("glob0:", patbuf); +#endif + + if ((err = glob1(patbuf, pglob, limit)) != 0) + return(err); + + /* + * If there was no match we are going to append the pattern + * if GLOB_NOCHECK was specified or if GLOB_NOMAGIC was specified + * and the pattern did not contain any magic characters + * GLOB_NOMAGIC is there just for compatibility with csh. + */ + if (pglob->gl_pathc == oldpathc) { + if (((pglob->gl_flags & GLOB_NOCHECK) || + ((pglob->gl_flags & GLOB_NOMAGIC) && + !(pglob->gl_flags & GLOB_MAGCHAR)))) + return(globextend(pattern, pglob, limit)); + else + return(GLOB_NOMATCH); + } + if (!(pglob->gl_flags & GLOB_NOSORT)) + qsort(pglob->gl_pathv + pglob->gl_offs + oldpathc, + pglob->gl_pathc - oldpathc, sizeof(char *), compare); + return(0); +} + +static int +compare(const void *p, const void *q) +{ + return(strcmp(*(char **)p, *(char **)q)); +} + +static int +glob1(Char *pattern, glob_t *pglob, int *limit) +{ + Char pathbuf[MAXPATHLEN]; + + /* A null pathname is invalid -- POSIX 1003.1 sect. 2.4. */ + if (*pattern == EOS) + return(0); + return(glob2(pathbuf, pathbuf, pathbuf + MAXPATHLEN - 1, + pattern, pglob, limit)); +} + +/* + * The functions glob2 and glob3 are mutually recursive; there is one level + * of recursion for each segment in the pattern that contains one or more + * meta characters. + */ +static int +glob2(Char *pathbuf, Char *pathend, Char *pathend_last, Char *pattern, glob_t *pglob, int *limit) +{ + struct stat sb; + Char *p, *q; + int anymeta; + + /* + * Loop over pattern segments until end of pattern or until + * segment with meta character found. + */ + for (anymeta = 0;;) { + if (*pattern == EOS) { /* End of pattern? */ + *pathend = EOS; + if (g_lstat(pathbuf, &sb, pglob)) + return(0); + + if (((pglob->gl_flags & GLOB_MARK) && + pathend[-1] != SEP) && (S_ISDIR(sb.st_mode))) { + if (pathend + 1 > pathend_last) + return (GLOB_ABORTED); + *pathend++ = SEP; + *pathend = EOS; + } + ++pglob->gl_matchc; + return(globextend(pathbuf, pglob, limit)); + } + + /* Find end of next segment, copy tentatively to pathend. */ + q = pathend; + p = pattern; + while (*p != EOS && *p != SEP) { + if (ismeta(*p)) + anymeta = 1; + if (q + 1 > pathend_last) + return (GLOB_ABORTED); + *q++ = *p++; + } + + if (!anymeta) { /* No expansion, do next segment. */ + pathend = q; + pattern = p; + while (*pattern == SEP) { + if (pathend + 1 > pathend_last) + return (GLOB_ABORTED); + *pathend++ = *pattern++; + } + } else /* Need expansion, recurse. */ + return(glob3(pathbuf, pathend, pathend_last, pattern, p, + pglob, limit)); + } + /* NOTREACHED */ +} + +static int +glob3(Char *pathbuf, Char *pathend, Char *pathend_last, Char *pattern, Char *restpattern, glob_t *pglob, int *limit) +{ + struct dirent *dp; + DIR *dirp; + int err; + char buf[MAXPATHLEN]; + + /* + * The readdirfunc declaration can't be prototyped, because it is + * assigned, below, to two functions which are prototyped in glob.h + * and dirent.h as taking pointers to differently typed opaque + * structures. + */ + typedef struct dirent *(*readdirfunc_t)(void*); + readdirfunc_t readdirfunc; + + if (pathend > pathend_last) + return (GLOB_ABORTED); + *pathend = EOS; + errno = 0; + + if ((dirp = g_opendir(pathbuf, pglob)) == NULL) { + /* TODO: don't call for ENOENT or ENOTDIR? */ + if (pglob->gl_errfunc) { + if (g_Ctoc(pathbuf, buf, sizeof(buf))) + return (GLOB_ABORTED); + if (pglob->gl_errfunc(buf, errno) || + pglob->gl_flags & GLOB_ERR) + return (GLOB_ABORTED); + } + return(0); + } + + err = 0; + + /* Search directory for matching names. */ + if (pglob->gl_flags & GLOB_ALTDIRFUNC) + readdirfunc = pglob->gl_readdir; + else + readdirfunc = (readdirfunc_t)readdir; + while ((dp = (*readdirfunc)(dirp))) { + u_char *sc; + Char *dc; + wchar_t wc; + size_t clen; + mbstate_t mbs; + + /* Initial DOT must be matched literally. */ + if (dp->d_name[0] == DOT && *pattern != DOT) + continue; + memset(&mbs, 0, sizeof(mbs)); + dc = pathend; + sc = (u_char *) dp->d_name; + while (dc < pathend_last) { + clen = mbrtowc(&wc, (const char*)sc, MB_LEN_MAX, &mbs); + if (clen == (size_t)-1 || clen == (size_t)-2) { + wc = *sc; + clen = 1; + memset(&mbs, 0, sizeof(mbs)); + } + if ((*dc++ = wc) == EOS) + break; + sc += clen; + } + if (!match(pathend, pattern, restpattern)) { + *pathend = EOS; + continue; + } + err = glob2(pathbuf, --dc, pathend_last, restpattern, + pglob, limit); + if (err) + break; + } + + if (pglob->gl_flags & GLOB_ALTDIRFUNC) + (*pglob->gl_closedir)(dirp); + else + closedir(dirp); + return(err); +} + + +/* + * Extend the gl_pathv member of a glob_t structure to accomodate a new item, + * add the new item, and update gl_pathc. + * + * This assumes the BSD realloc, which only copies the block when its size + * crosses a power-of-two boundary; for v7 realloc, this would cause quadratic + * behavior. + * + * Return 0 if new item added, error code if memory couldn't be allocated. + * + * Invariant of the glob_t structure: + * Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and + * gl_pathv points to (gl_offs + gl_pathc + 1) items. + */ +static int +globextend(const Char *path, glob_t *pglob, int *limit) +{ + char **pathv; + int i; + size_t newsize, len; + char *copy; + const Char *p; + + if (*limit && pglob->gl_pathc > *limit) { + errno = 0; + return (GLOB_NOSPACE); + } + + newsize = sizeof(*pathv) * (2 + pglob->gl_pathc + pglob->gl_offs); + pathv = pglob->gl_pathv ? + (char**)realloc((char *)pglob->gl_pathv, newsize) : + (char**)malloc(newsize); + if (pathv == NULL) { + if (pglob->gl_pathv) { + free(pglob->gl_pathv); + pglob->gl_pathv = NULL; + } + return(GLOB_NOSPACE); + } + + if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) { + /* first time around -- clear initial gl_offs items */ + pathv += pglob->gl_offs; + for (i = pglob->gl_offs; --i >= 0; ) + *--pathv = NULL; + } + pglob->gl_pathv = pathv; + + for (p = path; *p++;) + continue; + len = MB_CUR_MAX * (size_t)(p - path); /* XXX overallocation */ + if ((copy = (char*)malloc(len)) != NULL) { + if (g_Ctoc(path, copy, (u_int)len)) { + free(copy); + return (GLOB_NOSPACE); + } + pathv[pglob->gl_offs + pglob->gl_pathc++] = copy; + } + pathv[pglob->gl_offs + pglob->gl_pathc] = NULL; + return(copy == NULL ? GLOB_NOSPACE : 0); +} + +/* + * pattern matching function for filenames. Each occurrence of the * + * pattern causes a recursion level. + */ +static int +match(Char *name, Char *pat, Char *patend) +{ + int ok, negate_range; + Char c, k; + + while (pat < patend) { + c = *pat++; + switch (c & M_MASK) { + case M_ALL: + if (pat == patend) + return(1); + do + if (match(name, pat, patend)) + return(1); + while (*name++ != EOS); + return(0); + case M_ONE: + if (*name++ == EOS) + return(0); + break; + case M_SET: + ok = 0; + if ((k = *name++) == EOS) + return(0); + if ((negate_range = ((*pat & M_MASK) == M_NOT)) != EOS) + ++pat; + while (((c = *pat++) & M_MASK) != M_END) + if ((*pat & M_MASK) == M_RNG) { + if (__collate_load_error ? + CHAR(c) <= CHAR(k) && CHAR(k) <= CHAR(pat[1]) : + __collate_range_cmp((int)CHAR(c), (int)CHAR(k)) <= 0 + && __collate_range_cmp((int)CHAR(k), (int)CHAR(pat[1])) <= 0 + ) + ok = 1; + pat += 2; + } else if (c == k) + ok = 1; + if (ok == negate_range) + return(0); + break; + default: + if (*name++ != c) + return(0); + break; + } + } + return(*name == EOS); +} + +/* Free allocated data belonging to a glob_t structure. */ +void +globfree(glob_t *pglob) +{ + int i; + char **pp; + + if (pglob->gl_pathv != NULL) { + pp = pglob->gl_pathv + pglob->gl_offs; + for (i = pglob->gl_pathc; i--; ++pp) + if (*pp) + free(*pp); + free(pglob->gl_pathv); + pglob->gl_pathv = NULL; + } +} + +static DIR * +g_opendir(Char *str, glob_t *pglob) +{ + char buf[MAXPATHLEN]; + + if (!*str) + strcpy(buf, "."); + else { + if (g_Ctoc(str, buf, sizeof(buf))) + return (NULL); + } + + if (pglob->gl_flags & GLOB_ALTDIRFUNC) + return (DIR*)((*pglob->gl_opendir)(buf)); + + return(opendir(buf)); +} + +static int +g_lstat(Char *fn, struct stat *sb, glob_t *pglob) +{ + char buf[MAXPATHLEN]; + + if (g_Ctoc(fn, buf, sizeof(buf))) { + errno = ENAMETOOLONG; + return (-1); + } + if (pglob->gl_flags & GLOB_ALTDIRFUNC) + return((*pglob->gl_lstat)(buf, sb)); + return(lstat(buf, sb)); +} + +static Char * +g_strchr(Char *str, wchar_t ch) +{ + do { + if (*str == ch) + return (str); + } while (*str++); + return (NULL); +} + +static int +g_Ctoc(const Char *str, char *buf, u_int len) +{ + mbstate_t mbs; + size_t clen; + + memset(&mbs, 0, sizeof(mbs)); + while ((int)len >= MB_CUR_MAX) { + clen = wcrtomb(buf, (wchar_t)*str, &mbs); + if (clen == (size_t)-1) + return (1); + if (*str == L'\0') + return (0); + str++; + buf += clen; + len -= (u_int)clen; + } + return (1); +} + +#ifdef DEBUG +static void +qprintf(const char *str, Char *s) +{ + Char *p; + + (void)printf("%s:\n", str); + for (p = s; *p; p++) + (void)printf("%c", CHAR(*p)); + (void)printf("\n"); + for (p = s; *p; p++) + (void)printf("%c", *p & M_PROTECT ? '"' : ' '); + (void)printf("\n"); + for (p = s; *p; p++) + (void)printf("%c", ismeta(*p) ? '_' : ' '); + (void)printf("\n"); +} +#endif +#endif diff --git a/library/cpp/regex/glob/glob_compat.h b/library/cpp/regex/glob/glob_compat.h new file mode 100644 index 0000000000..0dc518d51b --- /dev/null +++ b/library/cpp/regex/glob/glob_compat.h @@ -0,0 +1,73 @@ +#pragma once + +#include <util/system/defaults.h> + +#if defined(_MSC_VER) || defined(_bionic_) +#define USE_INTERNAL_GLOB +#endif + +#if !defined(USE_INTERNAL_GLOB) +#include <glob.h> +#else + +struct stat; +typedef struct { + int gl_pathc; /* Count of total paths so far. */ + int gl_matchc; /* Count of paths matching pattern. */ + int gl_offs; /* Reserved at beginning of gl_pathv. */ + int gl_flags; /* Copy of flags parameter to glob. */ + char** gl_pathv; /* List of paths matching pattern. */ + /* Copy of errfunc parameter to glob. */ + int (*gl_errfunc)(const char*, int); + + /* + * Alternate filesystem access methods for glob; replacement + * versions of closedir(3), readdir(3), opendir(3), stat(2) + * and lstat(2). + */ + void (*gl_closedir)(void*); + struct dirent* (*gl_readdir)(void*); + void* (*gl_opendir)(const char*); + int (*gl_lstat)(const char*, struct stat*); + int (*gl_stat)(const char*, struct stat*); +} glob_t; + +//#if __POSIX_VISIBLE >= 199209 +/* Believed to have been introduced in 1003.2-1992 */ +#define GLOB_APPEND 0x0001 /* Append to output from previous call. */ +#define GLOB_DOOFFS 0x0002 /* Use gl_offs. */ +#define GLOB_ERR 0x0004 /* Return on error. */ +#define GLOB_MARK 0x0008 /* Append / to matching directories. */ +#define GLOB_NOCHECK 0x0010 /* Return pattern itself if nothing matches. */ +#define GLOB_NOSORT 0x0020 /* Don't sort. */ +#define GLOB_NOESCAPE 0x2000 /* Disable backslash escaping. */ + +/* Error values returned by glob(3) */ +#define GLOB_NOSPACE (-1) /* Malloc call failed. */ +#define GLOB_ABORTED (-2) /* Unignored error. */ +#define GLOB_NOMATCH (-3) /* No match and GLOB_NOCHECK was not set. */ +#define GLOB_NOSYS (-4) /* Obsolete: source comptability only. */ +//#endif /* __POSIX_VISIBLE >= 199209 */ + +//#if __BSD_VISIBLE +#define GLOB_ALTDIRFUNC 0x0040 /* Use alternately specified directory funcs. */ +#define GLOB_BRACE 0x0080 /* Expand braces ala csh. */ +#define GLOB_MAGCHAR 0x0100 /* Pattern had globbing characters. */ +#define GLOB_NOMAGIC 0x0200 /* GLOB_NOCHECK without magic chars (csh). */ +#define GLOB_QUOTE 0x0400 /* Quote special chars with \. */ +#define GLOB_TILDE 0x0800 /* Expand tilde names from the passwd file. */ +#define GLOB_LIMIT 0x1000 /* limit number of returned paths */ + +/* source compatibility, these are the old names */ +#define GLOB_MAXPATH GLOB_LIMIT +#define GLOB_ABEND GLOB_ABORTED +//#endif /* __BSD_VISIBLE */ + +int glob(const char*, int, int (*)(const char*, int), glob_t*); +void globfree(glob_t*); + +#endif /* _MSC_VER */ + +#if !defined(FROM_IMPLEMENTATION) +#undef USE_INTERNAL_GLOB +#endif diff --git a/library/cpp/regex/glob/glob_iterator.cpp b/library/cpp/regex/glob/glob_iterator.cpp new file mode 100644 index 0000000000..746b49f397 --- /dev/null +++ b/library/cpp/regex/glob/glob_iterator.cpp @@ -0,0 +1 @@ +#include "glob_iterator.h" diff --git a/library/cpp/regex/glob/glob_iterator.h b/library/cpp/regex/glob/glob_iterator.h new file mode 100644 index 0000000000..e25481e594 --- /dev/null +++ b/library/cpp/regex/glob/glob_iterator.h @@ -0,0 +1,36 @@ +#pragma once + +#include "glob_compat.h" + +#include <util/generic/noncopyable.h> +#include <util/generic/string.h> +#include <util/generic/yexception.h> + +class TGlobPaths : TNonCopyable { +public: + TGlobPaths(const char* pattern) { + Impl.gl_pathc = 0; + int result = glob(pattern, 0, nullptr, &Impl); + Y_ENSURE(result == 0 || result == GLOB_NOMATCH, "glob failed"); + } + + TGlobPaths(const TString& pattern) + : TGlobPaths(pattern.data()) + { + } + + ~TGlobPaths() { + globfree(&Impl); + } + + const char** begin() { + return const_cast<const char**>(Impl.gl_pathv); + } + + const char** end() { + return const_cast<const char**>(Impl.gl_pathv + Impl.gl_pathc); + } + +private: + glob_t Impl; +}; diff --git a/library/cpp/regex/glob/ya.make b/library/cpp/regex/glob/ya.make new file mode 100644 index 0000000000..9379742d99 --- /dev/null +++ b/library/cpp/regex/glob/ya.make @@ -0,0 +1,12 @@ +LIBRARY() + +SRCS( + glob.cpp + glob_iterator.cpp +) + +PEERDIR( + library/cpp/charset +) + +END() diff --git a/library/cpp/reverse_geocoder/CMakeLists.txt b/library/cpp/reverse_geocoder/CMakeLists.txt new file mode 100644 index 0000000000..621e95fdb2 --- /dev/null +++ b/library/cpp/reverse_geocoder/CMakeLists.txt @@ -0,0 +1,11 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(core) +add_subdirectory(library) +add_subdirectory(proto) diff --git a/library/cpp/reverse_geocoder/core/CMakeLists.darwin-x86_64.txt b/library/cpp/reverse_geocoder/core/CMakeLists.darwin-x86_64.txt new file mode 100644 index 0000000000..17f6e79c96 --- /dev/null +++ b/library/cpp/reverse_geocoder/core/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,35 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(cpp-reverse_geocoder-core) +target_link_libraries(cpp-reverse_geocoder-core PUBLIC + contrib-libs-cxxsupp + yutil + cpp-reverse_geocoder-library + cpp-reverse_geocoder-proto + cpp-digest-crc32c +) +target_sources(cpp-reverse_geocoder-core PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/area_box.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/bbox.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/common.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/edge.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/reverse_geocoder.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/kv.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/location.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/part.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/point.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/polygon.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/region.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/debug.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/def.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/geo_data.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/map.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/proxy.cpp +) diff --git a/library/cpp/reverse_geocoder/core/CMakeLists.linux-aarch64.txt b/library/cpp/reverse_geocoder/core/CMakeLists.linux-aarch64.txt new file mode 100644 index 0000000000..02361a0a1a --- /dev/null +++ b/library/cpp/reverse_geocoder/core/CMakeLists.linux-aarch64.txt @@ -0,0 +1,36 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(cpp-reverse_geocoder-core) +target_link_libraries(cpp-reverse_geocoder-core PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + cpp-reverse_geocoder-library + cpp-reverse_geocoder-proto + cpp-digest-crc32c +) +target_sources(cpp-reverse_geocoder-core PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/area_box.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/bbox.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/common.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/edge.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/reverse_geocoder.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/kv.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/location.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/part.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/point.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/polygon.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/region.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/debug.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/def.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/geo_data.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/map.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/proxy.cpp +) diff --git a/library/cpp/reverse_geocoder/core/CMakeLists.linux-x86_64.txt b/library/cpp/reverse_geocoder/core/CMakeLists.linux-x86_64.txt new file mode 100644 index 0000000000..02361a0a1a --- /dev/null +++ b/library/cpp/reverse_geocoder/core/CMakeLists.linux-x86_64.txt @@ -0,0 +1,36 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(cpp-reverse_geocoder-core) +target_link_libraries(cpp-reverse_geocoder-core PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + cpp-reverse_geocoder-library + cpp-reverse_geocoder-proto + cpp-digest-crc32c +) +target_sources(cpp-reverse_geocoder-core PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/area_box.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/bbox.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/common.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/edge.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/reverse_geocoder.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/kv.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/location.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/part.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/point.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/polygon.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/region.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/debug.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/def.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/geo_data.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/map.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/proxy.cpp +) diff --git a/library/cpp/reverse_geocoder/core/CMakeLists.txt b/library/cpp/reverse_geocoder/core/CMakeLists.txt new file mode 100644 index 0000000000..f8b31df0c1 --- /dev/null +++ b/library/cpp/reverse_geocoder/core/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) + include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/library/cpp/reverse_geocoder/core/CMakeLists.windows-x86_64.txt b/library/cpp/reverse_geocoder/core/CMakeLists.windows-x86_64.txt new file mode 100644 index 0000000000..17f6e79c96 --- /dev/null +++ b/library/cpp/reverse_geocoder/core/CMakeLists.windows-x86_64.txt @@ -0,0 +1,35 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(cpp-reverse_geocoder-core) +target_link_libraries(cpp-reverse_geocoder-core PUBLIC + contrib-libs-cxxsupp + yutil + cpp-reverse_geocoder-library + cpp-reverse_geocoder-proto + cpp-digest-crc32c +) +target_sources(cpp-reverse_geocoder-core PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/area_box.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/bbox.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/common.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/edge.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/reverse_geocoder.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/kv.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/location.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/part.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/point.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/polygon.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/region.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/debug.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/def.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/geo_data.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/map.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/core/geo_data/proxy.cpp +) diff --git a/library/cpp/reverse_geocoder/core/area_box.cpp b/library/cpp/reverse_geocoder/core/area_box.cpp new file mode 100644 index 0000000000..67038fe4f8 --- /dev/null +++ b/library/cpp/reverse_geocoder/core/area_box.cpp @@ -0,0 +1,9 @@ +#include "area_box.h" + +using namespace NReverseGeocoder; + +TRef NReverseGeocoder::LookupAreaBox(const TPoint& point) { + const TRef boxX = (point.X - NAreaBox::LowerX) / NAreaBox::DeltaX; + const TRef boxY = (point.Y - NAreaBox::LowerY) / NAreaBox::DeltaY; + return boxX * NAreaBox::NumberY + boxY; +} diff --git a/library/cpp/reverse_geocoder/core/area_box.h b/library/cpp/reverse_geocoder/core/area_box.h new file mode 100644 index 0000000000..1077a65fef --- /dev/null +++ b/library/cpp/reverse_geocoder/core/area_box.h @@ -0,0 +1,34 @@ +#pragma once + +#include "common.h" +#include "point.h" + +namespace NReverseGeocoder { + namespace NAreaBox { + const TCoordinate LowerX = ToCoordinate(-180.0); + const TCoordinate UpperX = ToCoordinate(180.0); + const TCoordinate LowerY = ToCoordinate(-90.0); + const TCoordinate UpperY = ToCoordinate(90.0); + const TCoordinate DeltaX = ToCoordinate(0.1); + const TCoordinate DeltaY = ToCoordinate(0.1); + const TCoordinate NumberX = (UpperX - LowerX) / DeltaX; + const TCoordinate NumberY = (UpperY - LowerY) / DeltaY; + const TCoordinate Number = NumberX * NumberY; + + } + + // Area of geo territory. Variable PolygonRefsOffset refers to the polygons lying inside this + // area. Geo map is divided into equal bounding boxes from (NAreaBox::LowerX, NAreaBox::LowerY) + // to (NAreaBox::UpperX, NAreaBox::UpperY) with DeltaX and DeltaY sizes. Logic of filling is in + // generator. + struct Y_PACKED TAreaBox { + TNumber PolygonRefsOffset; + TNumber PolygonRefsNumber; + }; + + static_assert(sizeof(TAreaBox) == 8, "NReverseGeocoder::TAreaBox size mismatch"); + + // Determine in wich area box in geoData is point. + TRef LookupAreaBox(const TPoint& point); + +} diff --git a/library/cpp/reverse_geocoder/core/bbox.cpp b/library/cpp/reverse_geocoder/core/bbox.cpp new file mode 100644 index 0000000000..aa4258ac22 --- /dev/null +++ b/library/cpp/reverse_geocoder/core/bbox.cpp @@ -0,0 +1 @@ +#include "bbox.h" diff --git a/library/cpp/reverse_geocoder/core/bbox.h b/library/cpp/reverse_geocoder/core/bbox.h new file mode 100644 index 0000000000..e8b6e00aa3 --- /dev/null +++ b/library/cpp/reverse_geocoder/core/bbox.h @@ -0,0 +1,66 @@ +#pragma once + +#include "common.h" +#include "point.h" + +#include <util/generic/utility.h> + +namespace NReverseGeocoder { + struct Y_PACKED TBoundingBox { + TCoordinate X1; + TCoordinate Y1; + TCoordinate X2; + TCoordinate Y2; + + TBoundingBox() + : X1(0) + , Y1(0) + , X2(0) + , Y2(0) + { + } + + TBoundingBox(TCoordinate x1, TCoordinate y1, TCoordinate x2, TCoordinate y2) + : X1(x1) + , Y1(y1) + , X2(x2) + , Y2(y2) + { + } + + TBoundingBox(const TPoint* points, TNumber number) { + Init(); + for (TNumber i = 0; i < number; ++i) + Relax(points[i]); + } + + void Init() { + X1 = ToCoordinate(180.0); + Y1 = ToCoordinate(90.0); + X2 = ToCoordinate(-180.0); + Y2 = ToCoordinate(-90.0); + } + + void Relax(const TPoint& p) { + X1 = Min(X1, p.X); + Y1 = Min(Y1, p.Y); + X2 = Max(X2, p.X); + Y2 = Max(Y2, p.Y); + } + + bool HasIntersection(const TBoundingBox& r) const { + if (X1 > r.X2 || X2 < r.X1 || Y1 > r.Y2 || Y2 < r.Y1) + return false; + return true; + } + + bool Contains(const TPoint& p) const { + if (p.X < X1 || p.X > X2 || p.Y < Y1 || p.Y > Y2) + return false; + return true; + } + }; + + static_assert(sizeof(TBoundingBox) == 16, "NReverseGeocoder::TBoundingBox size mismatch"); + +} diff --git a/library/cpp/reverse_geocoder/core/common.cpp b/library/cpp/reverse_geocoder/core/common.cpp new file mode 100644 index 0000000000..67c02a20a0 --- /dev/null +++ b/library/cpp/reverse_geocoder/core/common.cpp @@ -0,0 +1 @@ +#include "common.h" diff --git a/library/cpp/reverse_geocoder/core/common.h b/library/cpp/reverse_geocoder/core/common.h new file mode 100644 index 0000000000..090407ffd9 --- /dev/null +++ b/library/cpp/reverse_geocoder/core/common.h @@ -0,0 +1,24 @@ +#pragma once + +#include <util/system/compiler.h> +#include <util/system/types.h> + +namespace NReverseGeocoder { + using TCoordinate = i32; + using TGeoId = ui64; + using TNumber = ui32; + using TRef = ui32; + using TSquare = i64; + using TVersion = ui64; + + const double EARTH_RADIUS = 6371000.0; + + inline TCoordinate ToCoordinate(double x) { + return x * 1e6; + } + + inline double ToDouble(TCoordinate x) { + return x / 1e6; + } + +} diff --git a/library/cpp/reverse_geocoder/core/edge.cpp b/library/cpp/reverse_geocoder/core/edge.cpp new file mode 100644 index 0000000000..86c6ab8535 --- /dev/null +++ b/library/cpp/reverse_geocoder/core/edge.cpp @@ -0,0 +1 @@ +#include "edge.h" diff --git a/library/cpp/reverse_geocoder/core/edge.h b/library/cpp/reverse_geocoder/core/edge.h new file mode 100644 index 0000000000..9d20928857 --- /dev/null +++ b/library/cpp/reverse_geocoder/core/edge.h @@ -0,0 +1,101 @@ +#pragma once + +#include "common.h" +#include "point.h" + +#include <util/generic/utility.h> +#include <util/system/yassert.h> + +namespace NReverseGeocoder { + // TEdge is a type, which represent polygon edge, Beg/End refers on begin/End edge points in + // geographical data. + struct Y_PACKED TEdge { + TRef Beg; + TRef End; + + TEdge() + : Beg(0) + , End(0) + { + } + + TEdge(const TRef& a, const TRef& b) + : Beg(a) + , End(b) + { + } + + bool operator==(const TEdge& e) const { + return Beg == e.Beg && End == e.End; + } + + bool operator!=(const TEdge& e) const { + return Beg != e.Beg || End != e.End; + } + + bool operator<(const TEdge& e) const { + return Beg < e.Beg || (Beg == e.Beg && End < e.End); + } + + // Checks that current edge is lying lower then other edge. Both edges must have a common X + // values, otherwise the behavior is undefined. + bool Lower(const TEdge& e, const TPoint* points) const { + if (*this == e) + return false; + + const TPoint& a1 = points[Beg]; + const TPoint& a2 = points[End]; + const TPoint& b1 = points[e.Beg]; + const TPoint& b2 = points[e.End]; + + Y_ASSERT(a1.X <= a2.X && b1.X <= b2.X); + + if (a1 == b1) { + return (a2 - a1).Cross(b2 - a1) > 0; + } else if (a2 == b2) { + return (a1 - b1).Cross(b2 - b1) > 0; + } else if (b1.X >= a1.X && b1.X <= a2.X) { + return (a2 - a1).Cross(b1 - a1) > 0; + } else if (b2.X >= a1.X && b2.X <= a2.X) { + return (a2 - a1).Cross(b2 - a1) > 0; + } else if (a1.X >= b1.X && a1.X <= b2.X) { + return (a1 - b1).Cross(b2 - b1) > 0; + } else if (a2.X >= b1.X && a2.X <= b2.X) { + return (a2 - b1).Cross(b2 - b1) > 0; + } else { + return false; + } + } + + // Checks that current edge lying lower then given point. Edge and point must have a common X + // values, otherwise the behavior is undefined. + bool Lower(const TPoint& p, const TPoint* points) const { + if (Contains(p, points)) + return false; + + TPoint a = points[Beg]; + TPoint b = points[End]; + + if (a.X > b.X) + DoSwap(a, b); + + return (b - a).Cross(p - a) > 0; + } + + bool Contains(const TPoint& p, const TPoint* points) const { + TPoint a = points[Beg]; + TPoint b = points[End]; + + if (a.X > b.X) + DoSwap(a, b); + + if (p.X < a.X || p.X > b.X) + return false; + + return (b - a).Cross(p - a) == 0; + } + }; + + static_assert(sizeof(TEdge) == 8, "NReverseGeocoder::TEdge size mismatch"); + +} diff --git a/library/cpp/reverse_geocoder/core/geo_data/debug.cpp b/library/cpp/reverse_geocoder/core/geo_data/debug.cpp new file mode 100644 index 0000000000..4db0534b22 --- /dev/null +++ b/library/cpp/reverse_geocoder/core/geo_data/debug.cpp @@ -0,0 +1,74 @@ +#include "debug.h" + +#include <library/cpp/reverse_geocoder/library/log.h> +#include <library/cpp/reverse_geocoder/library/memory.h> + +using namespace NReverseGeocoder; +using namespace NGeoData; + +size_t NReverseGeocoder::NGeoData::Space(const IGeoData& g) { + size_t space = 0; + +#define GEO_BASE_DEF_VAR(TVar, Var) \ + space += sizeof(TVar); + +#define GEO_BASE_DEF_ARR(TArr, Arr) \ + space += sizeof(TNumber) + sizeof(TArr) * g.Arr##Number(); + + GEO_BASE_DEF_GEO_DATA + +#undef GEO_BASE_DEF_VAR +#undef GEO_BASE_DEF_ARR + + return space; +} + +template <typename TArr> +static float ArraySpace(TNumber number) { + return number * sizeof(TArr) * 1.0 / MB; +} + +void NReverseGeocoder::NGeoData::Show(IOutputStream& out, const IGeoData& g) { + out << "GeoData = " << NGeoData::Space(g) * 1.0 / GB << " GB" << '\n'; + +#define GEO_BASE_DEF_VAR(TVar, Var) \ + out << " GeoData." << #Var << " = " << (unsigned long long)g.Var() << '\n'; + +#define GEO_BASE_DEF_ARR(TArr, Arr) \ + out << " GeoData." << #Arr << " = " \ + << g.Arr##Number() << " x " << sizeof(TArr) << " = " \ + << ArraySpace<TArr>(g.Arr##Number()) << " MB" \ + << '\n'; + + GEO_BASE_DEF_GEO_DATA + +#undef GEO_BASE_DEF_VAR +#undef GEO_BASE_DEF_ARR +} + +template <typename TArr> +static bool Equals(const TArr* a, const TArr* b, size_t count) { + return !memcmp(a, b, sizeof(TArr) * count); +} + +bool NReverseGeocoder::NGeoData::Equals(const IGeoData& a, const IGeoData& b) { +#define GEO_BASE_DEF_VAR(TVar, Var) \ + if (a.Var() != b.Var()) { \ + LogError(#Var " not equal"); \ + return false; \ + } + +#define GEO_BASE_DEF_ARR(TArr, Arr) \ + GEO_BASE_DEF_VAR(TNumber, Arr##Number); \ + if (!::Equals(a.Arr(), b.Arr(), a.Arr##Number())) { \ + LogError(#Arr " not equal"); \ + return false; \ + } + + GEO_BASE_DEF_GEO_DATA + +#undef GEO_BASE_DEF_VAR +#undef GEO_BASE_DEF_ARR + + return true; +} diff --git a/library/cpp/reverse_geocoder/core/geo_data/debug.h b/library/cpp/reverse_geocoder/core/geo_data/debug.h new file mode 100644 index 0000000000..e7a4d9029c --- /dev/null +++ b/library/cpp/reverse_geocoder/core/geo_data/debug.h @@ -0,0 +1,16 @@ +#pragma once + +#include "geo_data.h" + +#include <util/stream/output.h> + +namespace NReverseGeocoder { + namespace NGeoData { + size_t Space(const IGeoData& g); + + void Show(IOutputStream& out, const IGeoData& g); + + bool Equals(const IGeoData& a, const IGeoData& b); + + } +} diff --git a/library/cpp/reverse_geocoder/core/geo_data/def.cpp b/library/cpp/reverse_geocoder/core/geo_data/def.cpp new file mode 100644 index 0000000000..bb9f760d73 --- /dev/null +++ b/library/cpp/reverse_geocoder/core/geo_data/def.cpp @@ -0,0 +1 @@ +#include "def.h" diff --git a/library/cpp/reverse_geocoder/core/geo_data/def.h b/library/cpp/reverse_geocoder/core/geo_data/def.h new file mode 100644 index 0000000000..d3e331d873 --- /dev/null +++ b/library/cpp/reverse_geocoder/core/geo_data/def.h @@ -0,0 +1,35 @@ +#pragma once + +#include <library/cpp/reverse_geocoder/core/area_box.h> +#include <library/cpp/reverse_geocoder/core/common.h> +#include <library/cpp/reverse_geocoder/core/edge.h> +#include <library/cpp/reverse_geocoder/core/kv.h> +#include <library/cpp/reverse_geocoder/core/part.h> +#include <library/cpp/reverse_geocoder/core/point.h> +#include <library/cpp/reverse_geocoder/core/polygon.h> +#include <library/cpp/reverse_geocoder/core/region.h> + +namespace NReverseGeocoder { + const TVersion GEO_DATA_VERSION_0 = 0; + const TVersion GEO_DATA_VERSION_1 = 1; + + const TVersion GEO_DATA_CURRENT_VERSION = GEO_DATA_VERSION_1; + +// Geographical data definition. This define need for reflection in map/unmap, show, etc. +#define GEO_BASE_DEF_GEO_DATA \ + GEO_BASE_DEF_VAR(TVersion, Version); \ + GEO_BASE_DEF_ARR(TPoint, Points); \ + GEO_BASE_DEF_ARR(TEdge, Edges); \ + GEO_BASE_DEF_ARR(TRef, EdgeRefs); \ + GEO_BASE_DEF_ARR(TPart, Parts); \ + GEO_BASE_DEF_ARR(TPolygon, Polygons); \ + GEO_BASE_DEF_ARR(TRef, PolygonRefs); \ + GEO_BASE_DEF_ARR(TAreaBox, Boxes); \ + GEO_BASE_DEF_ARR(char, Blobs); \ + GEO_BASE_DEF_ARR(TKv, Kvs); \ + GEO_BASE_DEF_ARR(TRegion, Regions); \ + GEO_BASE_DEF_ARR(TRawPolygon, RawPolygons); \ + GEO_BASE_DEF_ARR(TRef, RawEdgeRefs); \ + // #define GEO_BASE_DEF_GEO_DATA + +} diff --git a/library/cpp/reverse_geocoder/core/geo_data/geo_data.cpp b/library/cpp/reverse_geocoder/core/geo_data/geo_data.cpp new file mode 100644 index 0000000000..be3310b291 --- /dev/null +++ b/library/cpp/reverse_geocoder/core/geo_data/geo_data.cpp @@ -0,0 +1 @@ +#include "geo_data.h" diff --git a/library/cpp/reverse_geocoder/core/geo_data/geo_data.h b/library/cpp/reverse_geocoder/core/geo_data/geo_data.h new file mode 100644 index 0000000000..7cb76bcddc --- /dev/null +++ b/library/cpp/reverse_geocoder/core/geo_data/geo_data.h @@ -0,0 +1,24 @@ +#pragma once + +#include "def.h" + +namespace NReverseGeocoder { + class IGeoData { +#define GEO_BASE_DEF_VAR(TVar, Var) \ + virtual const TVar& Var() const = 0; + +#define GEO_BASE_DEF_ARR(TArr, Arr) \ + virtual const TArr* Arr() const = 0; \ + virtual TNumber Arr##Number() const = 0; + + public: + GEO_BASE_DEF_GEO_DATA + +#undef GEO_BASE_DEF_VAR +#undef GEO_BASE_DEF_ARR + + virtual ~IGeoData() { + } + }; + +} diff --git a/library/cpp/reverse_geocoder/core/geo_data/map.cpp b/library/cpp/reverse_geocoder/core/geo_data/map.cpp new file mode 100644 index 0000000000..312f7d7cb0 --- /dev/null +++ b/library/cpp/reverse_geocoder/core/geo_data/map.cpp @@ -0,0 +1,203 @@ +#include "map.h" + +#include <library/cpp/reverse_geocoder/library/log.h> +#include <library/cpp/reverse_geocoder/library/system.h> +#include <library/cpp/reverse_geocoder/proto/geo_data.pb.h> + +#include <library/cpp/digest/crc32c/crc32c.h> + +#include <util/generic/algorithm.h> +#include <util/generic/buffer.h> +#include <util/generic/vector.h> +#include <util/network/address.h> +#include <util/system/filemap.h> +#include <util/system/unaligned_mem.h> + +using namespace NReverseGeocoder; + +static const TNumber CRC_SIZE = 3; + +void NReverseGeocoder::TGeoDataMap::Init() { +#define GEO_BASE_DEF_VAR(TVar, Var) \ + Var##_ = TVar(); + +#define GEO_BASE_DEF_ARR(TArr, Arr) \ + Arr##_ = nullptr; \ + Arr##Number_ = 0; + + GEO_BASE_DEF_GEO_DATA + +#undef GEO_BASE_DEF_VAR +#undef GEO_BASE_DEF_ARR +} + +NReverseGeocoder::TGeoDataMap::TGeoDataMap() + : Data_(nullptr) + , Size_(0) +{ + Init(); +} + +static bool CheckMemoryConsistency(const NProto::TGeoData& g) { + TVector<std::pair<intptr_t, intptr_t>> segments; + +#define GEO_BASE_DEF_VAR(TVar, Var) \ + // undef + +#define GEO_BASE_DEF_ARR(TArr, Arr) \ + if (g.Get##Arr##Number() > 0) { \ + intptr_t const beg = g.Get##Arr(); \ + intptr_t const end = g.Get##Arr() + g.Get##Arr##Number() * sizeof(TArr); \ + segments.emplace_back(beg, end); \ + } + + GEO_BASE_DEF_GEO_DATA + +#undef GEO_BASE_DEF_VAR +#undef GEO_BASE_DEF_ARR + + Sort(segments.begin(), segments.end()); + + for (size_t i = 0; i + 1 < segments.size(); ++i) + if (segments[i].second > segments[i + 1].first) + return false; + + return true; +} + +void NReverseGeocoder::TGeoDataMap::Remap() { + Init(); + + if (!Data_) + return; + + const ui64 headerSize = ntohl(ReadUnaligned<ui64>(Data_)); + + NProto::TGeoData header; + if (!header.ParseFromArray(Data_ + sizeof(ui64), headerSize)) + ythrow yexception() << "Unable parse geoData header"; + + if (header.GetMagic() != SYSTEM_ENDIAN_FLAG) + ythrow yexception() << "Different endianness in geoData and host"; + + if (!CheckMemoryConsistency(header)) + ythrow yexception() << "Memory is not consistent!"; + +#define GEO_BASE_DEF_VAR(TVar, Var) \ + Var##_ = header.Get##Var(); + +#define GEO_BASE_DEF_ARR(TArr, Arr) \ + GEO_BASE_DEF_VAR(TNumber, Arr##Number); \ + if (Arr##Number() > 0) { \ + const intptr_t offset = header.Get##Arr(); \ + Arr##_ = (TArr*)(((intptr_t)Data_) + offset); \ + const ui32 hash = Crc32c(Arr##_, std::min(Arr##Number_, CRC_SIZE) * sizeof(TArr)); \ + if (hash != header.Get##Arr##Crc32()) \ + ythrow yexception() << "Wrong crc32 for " << #Arr; \ + } + + GEO_BASE_DEF_GEO_DATA + +#undef GEO_BASE_DEF_VAR +#undef GEO_BASE_DEF_ARR + + if (Version() != GEO_DATA_CURRENT_VERSION) + ythrow yexception() << "Unable use version " << Version() + << "(current version is " << GEO_DATA_CURRENT_VERSION << ")"; +} + +static size_t HeaderSize() { + NProto::TGeoData header; + header.SetMagic(std::numeric_limits<decltype(header.GetMagic())>::max()); + +#define GEO_BASE_DEF_VAR(TVar, Var) \ + header.Set##Var(std::numeric_limits<decltype(header.Get##Var())>::max()); + +#define GEO_BASE_DEF_ARR(TArr, Arr) \ + GEO_BASE_DEF_VAR(TNumber, Arr##Number); \ + header.Set##Arr(std::numeric_limits<decltype(header.Get##Arr())>::max()); \ + header.Set##Arr##Crc32(std::numeric_limits<decltype(header.Get##Arr##Crc32())>::max()); + + GEO_BASE_DEF_GEO_DATA + +#undef GEO_BASE_DEF_VAR +#undef GEO_BASE_DEF_ARR + + return header.ByteSize(); +} + +static const char* Serialize(const IGeoData& g, TBlockAllocator* allocator, size_t* size) { + size_t const preAllocatedSize = allocator->TotalAllocatedSize(); + char* data = (char*)allocator->Allocate(HeaderSize() + sizeof(ui64)); + + NProto::TGeoData header; + header.SetMagic(SYSTEM_ENDIAN_FLAG); + +#define GEO_BASE_DEF_VAR(TVar, Var) \ + header.Set##Var(g.Var()); + +#define GEO_BASE_DEF_ARR(TArr, Arr) \ + GEO_BASE_DEF_VAR(TNumber, Arr##Number); \ + if (g.Arr##Number() > 0) { \ + TArr* arr = (TArr*)allocator->Allocate(sizeof(TArr) * g.Arr##Number()); \ + memcpy(arr, g.Arr(), sizeof(TArr) * g.Arr##Number()); \ + header.Set##Arr((ui64)(((intptr_t)arr) - ((intptr_t)data))); \ + header.Set##Arr##Crc32(Crc32c(arr, std::min(g.Arr##Number(), CRC_SIZE) * sizeof(TArr))); \ + }; + + GEO_BASE_DEF_GEO_DATA + +#undef GEO_BASE_DEF_VAR +#undef GEO_BASE_DEF_ARR + + const auto str = header.SerializeAsString(); + WriteUnaligned<ui64>(data, (ui64)htonl(str.size())); + memcpy(data + sizeof(ui64), str.data(), str.size()); + + if (size) + *size = allocator->TotalAllocatedSize() - preAllocatedSize; + + return data; +} + +static size_t TotalByteSize(const IGeoData& g) { + size_t total_size = TBlockAllocator::AllocateSize(HeaderSize() + sizeof(ui64)); + +#define GEO_BASE_DEF_VAR(TVar, Var) \ + // undef + +#define GEO_BASE_DEF_ARR(TArr, Arr) \ + total_size += TBlockAllocator::AllocateSize(sizeof(TArr) * g.Arr##Number()); + + GEO_BASE_DEF_GEO_DATA + +#undef GEO_BASE_DEF_VAR +#undef GEO_BASE_DEF_ARR + + return total_size; +} + +NReverseGeocoder::TGeoDataMap::TGeoDataMap(const IGeoData& geoData, TBlockAllocator* allocator) + : TGeoDataMap() +{ + Data_ = Serialize(geoData, allocator, &Size_); + Remap(); +} + +void NReverseGeocoder::TGeoDataMap::SerializeToFile(const TString& path, const IGeoData& data) { + TBlob data_blob = SerializeToBlob(data); + + TFile file(path, CreateAlways | RdWr); + file.Write(data_blob.Data(), data_blob.Length()); +} + +TBlob NReverseGeocoder::TGeoDataMap::SerializeToBlob(const IGeoData& data) { + TBuffer buf; + buf.Resize(TotalByteSize(data)); + memset(buf.data(), 0, buf.size()); + + TBlockAllocator allocator(buf.Data(), buf.Size()); + TGeoDataMap(data, &allocator); + + return TBlob::FromBuffer(buf); +} diff --git a/library/cpp/reverse_geocoder/core/geo_data/map.h b/library/cpp/reverse_geocoder/core/geo_data/map.h new file mode 100644 index 0000000000..e466bd912e --- /dev/null +++ b/library/cpp/reverse_geocoder/core/geo_data/map.h @@ -0,0 +1,89 @@ +#pragma once + +#include "geo_data.h" + +#include <library/cpp/reverse_geocoder/library/block_allocator.h> + +#include <util/memory/blob.h> + +namespace NReverseGeocoder { + class TGeoDataMap: public IGeoData, public TNonCopyable { +#define GEO_BASE_DEF_VAR(TVar, Var) \ +public: \ + const TVar& Var() const override { \ + return Var##_; \ + } \ + \ +private: \ + TVar Var##_; + +#define GEO_BASE_DEF_ARR(TArr, Arr) \ +public: \ + const TArr* Arr() const override { \ + return Arr##_; \ + } \ + TNumber Arr##Number() const override { \ + return Arr##Number_; \ + } \ + \ +private: \ + TNumber Arr##Number_; \ + const TArr* Arr##_; + + GEO_BASE_DEF_GEO_DATA + +#undef GEO_BASE_DEF_VAR +#undef GEO_BASE_DEF_ARR + + public: + TGeoDataMap(); + + static void SerializeToFile(const TString& path, const IGeoData& data); + + static TBlob SerializeToBlob(const IGeoData& data); + + TGeoDataMap(const IGeoData& data, TBlockAllocator* allocator); + + TGeoDataMap(const char* data, size_t size) + : TGeoDataMap() + { + Data_ = data; + Size_ = size; + Remap(); + } + + TGeoDataMap(TGeoDataMap&& dat) + : TGeoDataMap() + { + DoSwap(Data_, dat.Data_); + DoSwap(Size_, dat.Size_); + Remap(); + dat.Remap(); + } + + TGeoDataMap& operator=(TGeoDataMap&& dat) { + DoSwap(Data_, dat.Data_); + DoSwap(Size_, dat.Size_); + Remap(); + dat.Remap(); + return *this; + } + + const char* Data() const { + return Data_; + } + + size_t Size() const { + return Size_; + } + + private: + void Init(); + + void Remap(); + + const char* Data_; + size_t Size_; + }; + +} diff --git a/library/cpp/reverse_geocoder/core/geo_data/proxy.cpp b/library/cpp/reverse_geocoder/core/geo_data/proxy.cpp new file mode 100644 index 0000000000..5ff2d13783 --- /dev/null +++ b/library/cpp/reverse_geocoder/core/geo_data/proxy.cpp @@ -0,0 +1 @@ +#include "proxy.h" diff --git a/library/cpp/reverse_geocoder/core/geo_data/proxy.h b/library/cpp/reverse_geocoder/core/geo_data/proxy.h new file mode 100644 index 0000000000..fecb9fc7cf --- /dev/null +++ b/library/cpp/reverse_geocoder/core/geo_data/proxy.h @@ -0,0 +1,68 @@ +#pragma once + +#include "geo_data.h" +#include "map.h" + +#include <util/generic/ptr.h> +#include <util/system/filemap.h> + +namespace NReverseGeocoder { + class IGeoDataProxy { + public: + virtual const IGeoData* GeoData() const = 0; + + virtual ~IGeoDataProxy() { + } + }; + + using TGeoDataProxyPtr = THolder<IGeoDataProxy>; + + class TGeoDataMapProxy: public IGeoDataProxy, public TNonCopyable { + public: + explicit TGeoDataMapProxy(const char* path) + : MemFile_(path) + { + MemFile_.Map(0, MemFile_.Length()); + GeoData_ = TGeoDataMap((const char*)MemFile_.Ptr(), MemFile_.MappedSize()); + } + + const IGeoData* GeoData() const override { + return &GeoData_; + } + + private: + TFileMap MemFile_; + TGeoDataMap GeoData_; + }; + + class TGeoDataWrapper: public IGeoDataProxy, public TNonCopyable { + public: + explicit TGeoDataWrapper(const IGeoData& g) + : GeoData_(&g) + { + } + + const IGeoData* GeoData() const override { + return GeoData_; + } + + private: + const IGeoData* GeoData_; + }; + + class TGeoDataRawProxy: public IGeoDataProxy, public TNonCopyable { + public: + TGeoDataRawProxy(const char* data, size_t dataSize) + : GeoData_(data, dataSize) + { + } + + const IGeoData* GeoData() const override { + return &GeoData_; + } + + private: + TGeoDataMap GeoData_; + }; + +} diff --git a/library/cpp/reverse_geocoder/core/kv.cpp b/library/cpp/reverse_geocoder/core/kv.cpp new file mode 100644 index 0000000000..a48e9c947e --- /dev/null +++ b/library/cpp/reverse_geocoder/core/kv.cpp @@ -0,0 +1 @@ +#include "kv.h" diff --git a/library/cpp/reverse_geocoder/core/kv.h b/library/cpp/reverse_geocoder/core/kv.h new file mode 100644 index 0000000000..639c21de52 --- /dev/null +++ b/library/cpp/reverse_geocoder/core/kv.h @@ -0,0 +1,13 @@ +#pragma once + +#include "common.h" + +namespace NReverseGeocoder { + // k and v is offsets on blobs in geographical data blobs array. See geo_data.h + // for details. + struct TKv { + TNumber K; + TNumber V; + }; + +} diff --git a/library/cpp/reverse_geocoder/core/location.cpp b/library/cpp/reverse_geocoder/core/location.cpp new file mode 100644 index 0000000000..b2d2f54d12 --- /dev/null +++ b/library/cpp/reverse_geocoder/core/location.cpp @@ -0,0 +1 @@ +#include "location.h" diff --git a/library/cpp/reverse_geocoder/core/location.h b/library/cpp/reverse_geocoder/core/location.h new file mode 100644 index 0000000000..5aa3198684 --- /dev/null +++ b/library/cpp/reverse_geocoder/core/location.h @@ -0,0 +1,21 @@ +#pragma once + +namespace NReverseGeocoder { + struct TLocation { + double Lon; + double Lat; + + TLocation() + : Lon(0) + , Lat(0) + { + } + + TLocation(double lon, double lat) + : Lon(lon) + , Lat(lat) + { + } + }; + +} diff --git a/library/cpp/reverse_geocoder/core/part.cpp b/library/cpp/reverse_geocoder/core/part.cpp new file mode 100644 index 0000000000..c973d2171a --- /dev/null +++ b/library/cpp/reverse_geocoder/core/part.cpp @@ -0,0 +1,29 @@ +#include "part.h" + +#include <library/cpp/reverse_geocoder/library/unaligned_iter.h> + +#include <util/generic/algorithm.h> + +using namespace NReverseGeocoder; + +bool NReverseGeocoder::TPart::Contains(const TPoint& point, TNumber edgeRefsNumber, const TRef* edgeRefs, + const TEdge* edges, const TPoint* points) const { + auto edgeRefsBegin = UnalignedIter(edgeRefs) + EdgeRefsOffset; + auto edgeRefsEnd = edgeRefsBegin + edgeRefsNumber; + + // Find lower bound edge, which lying below given point. + auto cmp = [&](const TRef& e, const TPoint& p) { + return edges[e].Lower(p, points); + }; + + auto edgeRef = LowerBound(edgeRefsBegin, edgeRefsEnd, point, cmp); + + if (edgeRef == edgeRefsEnd) + return false; + + if (edges[*edgeRef].Contains(point, points)) + return true; + + // If the point is inside of the polygon then it will intersect the edge an odd number of times. + return (edgeRef - edgeRefsBegin) % 2 == 1; +} diff --git a/library/cpp/reverse_geocoder/core/part.h b/library/cpp/reverse_geocoder/core/part.h new file mode 100644 index 0000000000..9b24fee96f --- /dev/null +++ b/library/cpp/reverse_geocoder/core/part.h @@ -0,0 +1,26 @@ +#pragma once + +#include "common.h" +#include "edge.h" +#include "point.h" + +namespace NReverseGeocoder { + // TPart contains version of persistent scanline. Parts lying in geofraphical data parts array, + // ordered by Coordinate for each polygon. Variable EdgeRefsOffset refers on EdgeRefs array for + // this part. For optimal usage of memory, part does not contain "EdgeRefsNumber" variable, because + // it's can be computed as parts[i + 1].EdgeRefsOffset - parts[i].EdgeRefsOffset for every part + // in geographical data. Especially for this, added fake part into IGeoData with correct + // EdgeRefsOffset. Refs in EdgeRefs are in increasing order for each part. It is necessary to + // quickly determine how many edges is under the point. See generator/ for details. + struct Y_PACKED TPart { + TCoordinate Coordinate; + TNumber EdgeRefsOffset; + + // Checks point lying under odd numbers of edges or on edge. + bool Contains(const TPoint& point, TNumber edgeRefsNumber, const TRef* edgeRefs, + const TEdge* edges, const TPoint* points) const; + }; + + static_assert(sizeof(TPart) == 8, "NReverseGeocoder::TPart size mismatch"); + +} diff --git a/library/cpp/reverse_geocoder/core/point.cpp b/library/cpp/reverse_geocoder/core/point.cpp new file mode 100644 index 0000000000..396e27e596 --- /dev/null +++ b/library/cpp/reverse_geocoder/core/point.cpp @@ -0,0 +1 @@ +#include "point.h" diff --git a/library/cpp/reverse_geocoder/core/point.h b/library/cpp/reverse_geocoder/core/point.h new file mode 100644 index 0000000000..75f1dfc1b4 --- /dev/null +++ b/library/cpp/reverse_geocoder/core/point.h @@ -0,0 +1,52 @@ +#pragma once + +#include "common.h" +#include "location.h" + +namespace NReverseGeocoder { + struct Y_PACKED TPoint { + TCoordinate X; + TCoordinate Y; + + TPoint() + : X(0) + , Y(0) + { + } + + TPoint(const TCoordinate& x1, const TCoordinate& y1) + : X(x1) + , Y(y1) + { + } + + explicit TPoint(const TLocation& l) + : X(ToCoordinate(l.Lon)) + , Y(ToCoordinate(l.Lat)) + { + } + + TPoint operator-(const TPoint& p) const { + return TPoint(X - p.X, Y - p.Y); + } + + bool operator==(const TPoint& b) const { + return X == b.X && Y == b.Y; + } + + bool operator!=(const TPoint& b) const { + return X != b.X || Y != b.Y; + } + + bool operator<(const TPoint& b) const { + return X < b.X || (X == b.X && Y < b.Y); + } + + TSquare Cross(const TPoint& p) const { + return 1ll * X * p.Y - 1ll * Y * p.X; + } + }; + + static_assert(sizeof(TPoint) == 8, "NReverseGeocoder::TPoint size mismatch"); + +} diff --git a/library/cpp/reverse_geocoder/core/polygon.cpp b/library/cpp/reverse_geocoder/core/polygon.cpp new file mode 100644 index 0000000000..2baac2d229 --- /dev/null +++ b/library/cpp/reverse_geocoder/core/polygon.cpp @@ -0,0 +1,91 @@ +#include "polygon.h" + +#include <util/generic/algorithm.h> + +using namespace NReverseGeocoder; + +static bool Check(const TPart* part, const TPoint& point, const TRef* edgeRefs, + const TEdge* edges, const TPoint* points) { + const TNumber edgeRefsNumber = (part + 1)->EdgeRefsOffset - part->EdgeRefsOffset; + return part->Contains(point, edgeRefsNumber, edgeRefs, edges, points); +} + +bool NReverseGeocoder::TPolygon::Contains(const TPoint& point, const TPart* parts, const TRef* edgeRefs, + const TEdge* edges, const TPoint* points) const { + if (!Bbox.Contains(point)) + return false; + + parts += PartsOffset; + const TPart* partsEnd = parts + PartsNumber; + + // Find lower bound part, which can contains given point. + const TPart* part = LowerBound(parts, partsEnd, point, [&](const TPart& a, const TPoint& b) { + return a.Coordinate < b.X; + }); + + if (part->Coordinate > point.X) { + if (part == parts) + return false; + --part; + } + + if (point.X < part->Coordinate || point.X > (part + 1)->Coordinate) + return false; + + if (point.X == part->Coordinate) + if (part != parts && Check(part - 1, point, edgeRefs, edges, points)) + return true; + + return Check(part, point, edgeRefs, edges, points); +} + +bool NReverseGeocoder::TPolygonBase::Better(const TPolygonBase& p, const TRegion* regions, + TNumber regionsNumber) const { + if (Square < p.Square) + return true; + + if (Square == p.Square) { + const TRegion* begin = regions; + const TRegion* end = regions + regionsNumber; + + const TRegion* r1 = LowerBound(begin, end, TGeoId(RegionId)); + const TRegion* r2 = LowerBound(begin, end, TGeoId(p.RegionId)); + + if (r1 == end || r1->RegionId != RegionId) + return false; + + if (r2 == end || r2->RegionId != p.RegionId) + return false; + + return r1->Better(*r2); + } + + return false; +} + +bool NReverseGeocoder::TRawPolygon::Contains(const TPoint& point, const TRef* edgeRefs, const TEdge* edges, + const TPoint* points) const { + if (!Bbox.Contains(point)) + return false; + + edgeRefs += EdgeRefsOffset; + + TNumber intersections = 0; + for (TNumber i = 0; i < EdgeRefsNumber; ++i) { + const TEdge& e = edges[edgeRefs[i]]; + + if (e.Contains(point, points)) + return true; + + TPoint a = points[e.Beg]; + TPoint b = points[e.End]; + + if (a.X > b.X) + DoSwap(a, b); + + if (a.X < point.X && b.X >= point.X && e.Lower(point, points)) + ++intersections; + } + + return intersections % 2 == 1; +} diff --git a/library/cpp/reverse_geocoder/core/polygon.h b/library/cpp/reverse_geocoder/core/polygon.h new file mode 100644 index 0000000000..065bba1e38 --- /dev/null +++ b/library/cpp/reverse_geocoder/core/polygon.h @@ -0,0 +1,73 @@ +#pragma once + +#include "bbox.h" +#include "common.h" +#include "edge.h" +#include "part.h" +#include "point.h" +#include "region.h" + +namespace NReverseGeocoder { +#pragma pack(push, 1) + + struct TPolygonBase { + enum EType { + TYPE_UNKNOWN = 0, + TYPE_INNER = 1, + TYPE_OUTER = 2, + }; + + // If TYPE_INNER and polygon contains given point, this means that region with RegionId + // does not contains point. + EType Type; + + ui32 Unused1; + + // Geographical data indetifiers. + TGeoId RegionId; + TGeoId PolygonId; + + // Rectangle in which lies that polygon. + TBoundingBox Bbox; + + // Square of polygon. Need for determine which polygon is better. See better member function. + TSquare Square; + + // Total points number of given polygon. + TNumber PointsNumber; + + // Check that this polygon better then given polygon, which means that this polygons lying + // deeper then given in polygons hierarchy. + bool Better(const TPolygonBase& p, const TRegion* regions, TNumber regionsNumber) const; + }; + + // Polygon is a representation of persistent scanline data structure. + struct TPolygon: public TPolygonBase { + // Versions of persistent scanline. + TNumber PartsOffset; + TNumber PartsNumber; + ui32 Unused2; + + // Fast point in polygon test using persistent scanline. You can see how this data structure + // generated in generator/. + bool Contains(const TPoint& point, const TPart* parts, const TRef* edgeRefs, + const TEdge* edges, const TPoint* points) const; + }; + + static_assert(sizeof(TPolygon) == 64, "NReverseGeocoder::TPolygon size mismatch"); + + // Raw polygon is a polygon representation for slow tests. + struct TRawPolygon: public TPolygonBase { + // Raw polygon edge refs. + TNumber EdgeRefsOffset; + TNumber EdgeRefsNumber; + ui32 Unused2; + + bool Contains(const TPoint& point, const TRef* edgeRefs, const TEdge* edges, + const TPoint* points) const; + }; + + static_assert(sizeof(TRawPolygon) == 64, "NReverseGeocoder::TRawPolygon size mismatch"); + +#pragma pack(pop) +} diff --git a/library/cpp/reverse_geocoder/core/region.cpp b/library/cpp/reverse_geocoder/core/region.cpp new file mode 100644 index 0000000000..62b4acd0a1 --- /dev/null +++ b/library/cpp/reverse_geocoder/core/region.cpp @@ -0,0 +1 @@ +#include "region.h" diff --git a/library/cpp/reverse_geocoder/core/region.h b/library/cpp/reverse_geocoder/core/region.h new file mode 100644 index 0000000000..4b010c7103 --- /dev/null +++ b/library/cpp/reverse_geocoder/core/region.h @@ -0,0 +1,37 @@ +#pragma once + +#include "common.h" + +namespace NReverseGeocoder { + struct Y_PACKED TRegion { + TGeoId RegionId; + TNumber KvsOffset; + TNumber KvsNumber; + TSquare Square; + TNumber PolygonsNumber; + ui32 Unused; + + bool operator==(const TRegion& r) const { + return RegionId == r.RegionId; + } + + bool operator<(const TRegion& r) const { + return RegionId < r.RegionId; + } + + bool operator<(const TGeoId& r) const { + return RegionId < r; + } + + friend bool operator<(const TGeoId& regionId, const TRegion& r) { + return regionId < r.RegionId; + } + + bool Better(const TRegion& r) const { + return Square < r.Square; + } + }; + + static_assert(sizeof(TRegion) == 32, "NReverseGeocoder::TRegion size mismatch"); + +} diff --git a/library/cpp/reverse_geocoder/core/reverse_geocoder.cpp b/library/cpp/reverse_geocoder/core/reverse_geocoder.cpp new file mode 100644 index 0000000000..d73e4f2648 --- /dev/null +++ b/library/cpp/reverse_geocoder/core/reverse_geocoder.cpp @@ -0,0 +1,182 @@ +#include "reverse_geocoder.h" +#include "geo_data/geo_data.h" + +#include <library/cpp/reverse_geocoder/library/unaligned_iter.h> + +#include <util/generic/algorithm.h> +#include <util/system/unaligned_mem.h> + +using namespace NReverseGeocoder; + +static bool PolygonContains(const TPolygon& p, const TPoint& point, const IGeoData& geoData) { + const TPart* parts = geoData.Parts(); + const TRef* edgeRefs = geoData.EdgeRefs(); + const TEdge* edges = geoData.Edges(); + const TPoint* points = geoData.Points(); + return p.Contains(point, parts, edgeRefs, edges, points); +} + +template <typename TAnswer> +static void UpdateAnswer(const TAnswer** answer, const TAnswer& polygon, + const IGeoData& geoData) { + if (!*answer) { + *answer = &polygon; + } else { + const TRegion* regions = geoData.Regions(); + const TNumber regionsNumber = geoData.RegionsNumber(); + if (!(*answer)->Better(polygon, regions, regionsNumber)) + *answer = &polygon; + } +} + +static void SortDebug(TReverseGeocoder::TDebug* debug, const IGeoData& geoData) { + const TRegion* regions = geoData.Regions(); + const TNumber regionsNumber = geoData.RegionsNumber(); + + auto cmp = [&](const TGeoId& a, const TGeoId& b) { + const TRegion* r1 = LowerBound(regions, regions + regionsNumber, a); + const TRegion* r2 = LowerBound(regions, regions + regionsNumber, b); + return r1->Better(*r2); + }; + + Sort(debug->begin(), debug->end(), cmp); +} + +TGeoId NReverseGeocoder::TReverseGeocoder::Lookup(const TLocation& location, TDebug* debug) const { + const IGeoData& geoData = *GeoDataProxy_->GeoData(); + + if (debug) + debug->clear(); + + const TPoint point(location); + const TRef boxRef = LookupAreaBox(point); + + if (boxRef >= geoData.BoxesNumber()) + return UNKNOWN_GEO_ID; + + const TNumber refsOffset = geoData.Boxes()[boxRef].PolygonRefsOffset; + const TNumber refsNumber = geoData.Boxes()[boxRef].PolygonRefsNumber; + + const TPolygon* answer = nullptr; + + const TPolygon* p = geoData.Polygons(); + const auto refsBegin = UnalignedIter(geoData.PolygonRefs()) + refsOffset; + const auto refsEnd = refsBegin + refsNumber; + + for (auto iterL = refsBegin, iterR = refsBegin; iterL < refsEnd; iterL = iterR) { + iterR = iterL + 1; + + if (PolygonContains(p[*iterL], point, geoData)) { + if (p[*iterL].Type == TPolygon::TYPE_INNER) { + // All polygons with same RegionId must be skipped if polygon is inner. + // In geoData small inner polygons stored before big outer polygons. + while (iterR < refsEnd && p[*iterL].RegionId == p[*iterR].RegionId) + ++iterR; + + } else { + UpdateAnswer(&answer, p[*iterL], geoData); + + if (debug) + debug->push_back(p[*iterL].RegionId); + + while (iterR < refsEnd && p[*iterL].RegionId == p[*iterR].RegionId) + ++iterR; + } + } + } + + if (debug) + SortDebug(debug, geoData); + + return answer ? answer->RegionId : UNKNOWN_GEO_ID; +} + +TGeoId NReverseGeocoder::TReverseGeocoder::RawLookup(const TLocation& location, TDebug* debug) const { + const IGeoData& geoData = *GeoDataProxy_->GeoData(); + + if (debug) + debug->clear(); + + const TPoint point(location); + + const TRawPolygon* borders = geoData.RawPolygons(); + const TNumber bordersNumber = geoData.RawPolygonsNumber(); + + const TRawPolygon* answer = nullptr; + + TNumber i = 0; + while (i < bordersNumber) { + if (borders[i].Contains(point, geoData.RawEdgeRefs(), geoData.Edges(), geoData.Points())) { + if (borders[i].Type == TRawPolygon::TYPE_INNER) { + TNumber j = i + 1; + while (j < bordersNumber && borders[i].RegionId == borders[j].RegionId) + ++j; + + i = j; + + } else { + UpdateAnswer(&answer, borders[i], geoData); + + if (debug) + debug->push_back(borders[i].RegionId); + + TNumber j = i + 1; + while (j < bordersNumber && borders[i].RegionId == borders[j].RegionId) + ++j; + + i = j; + } + } else { + ++i; + } + } + + if (debug) + SortDebug(debug, geoData); + + return answer ? answer->RegionId : UNKNOWN_GEO_ID; +} + +bool NReverseGeocoder::TReverseGeocoder::EachKv(TGeoId regionId, TKvCallback callback) const { + const IGeoData& g = *GeoDataProxy_->GeoData(); + + const TRegion* begin = g.Regions(); + const TRegion* end = begin + g.RegionsNumber(); + + const TRegion* region = LowerBound(begin, end, regionId); + + if (region == end || region->RegionId != regionId) + return false; + + const TKv* kvs = g.Kvs() + region->KvsOffset; + const char* blobs = g.Blobs(); + + for (TNumber i = 0; i < region->KvsNumber; ++i) { + const char* k = blobs + kvs[i].K; + const char* v = blobs + kvs[i].V; + callback(k, v); + } + + return true; +} + +void NReverseGeocoder::TReverseGeocoder::EachPolygon(TPolygonCallback callback) const { + const IGeoData& g = *GeoDataProxy_->GeoData(); + + for (TNumber i = 0; i < g.PolygonsNumber(); ++i) + callback(g.Polygons()[i]); +} + +void NReverseGeocoder::TReverseGeocoder::EachPart(const TPolygon& polygon, TPartCallback callback) const { + const IGeoData& g = *GeoDataProxy_->GeoData(); + + const TNumber partsOffset = polygon.PartsOffset; + const TNumber partsNumber = polygon.PartsNumber; + + for (TNumber i = partsOffset; i < partsOffset + partsNumber; ++i) { + const TPart& part = g.Parts()[i]; + const TPart& npart = g.Parts()[i + 1]; + const TNumber edgeRefsNumber = npart.EdgeRefsOffset - part.EdgeRefsOffset; + callback(part, edgeRefsNumber); + } +} diff --git a/library/cpp/reverse_geocoder/core/reverse_geocoder.h b/library/cpp/reverse_geocoder/core/reverse_geocoder.h new file mode 100644 index 0000000000..c74eddb40e --- /dev/null +++ b/library/cpp/reverse_geocoder/core/reverse_geocoder.h @@ -0,0 +1,73 @@ +#pragma once + +#include "common.h" +#include "geo_data/geo_data.h" +#include "geo_data/proxy.h" + +#include <util/generic/noncopyable.h> +#include <util/generic/vector.h> + +#include <functional> + +namespace NReverseGeocoder { + const TGeoId UNKNOWN_GEO_ID = static_cast<TGeoId>(-1); + + // NOTE: Be careful! It's work fine and fast on real world dataset. + // But in theory it's can spent O(n^2) memory (on real world dataset it's just 6n). + // Point in polygon test will be O(log n) always. Memory spent will be O(n) in future! + class TReverseGeocoder: public TNonCopyable { + public: + using TDebug = TVector<TGeoId>; + using TKvCallback = std::function<void(const char*, const char*)>; + using TPolygonCallback = std::function<void(const TPolygon&)>; + using TPartCallback = std::function<void(const TPart&, TNumber)>; + + TReverseGeocoder() + : GeoDataProxy_() + { + } + + TReverseGeocoder(TReverseGeocoder&& g) + : GeoDataProxy_() + { + DoSwap(GeoDataProxy_, g.GeoDataProxy_); + } + + TReverseGeocoder& operator=(TReverseGeocoder&& g) { + DoSwap(GeoDataProxy_, g.GeoDataProxy_); + return *this; + } + + explicit TReverseGeocoder(const char* path) + : GeoDataProxy_(new TGeoDataMapProxy(path)) + { + } + + explicit TReverseGeocoder(const IGeoData& geoData) + : GeoDataProxy_(new TGeoDataWrapper(geoData)) + { + } + + TReverseGeocoder(const char* data, size_t dataSize) + : GeoDataProxy_(new TGeoDataRawProxy(data, dataSize)) + { + } + + TGeoId Lookup(const TLocation& location, TDebug* debug = nullptr) const; + + TGeoId RawLookup(const TLocation& location, TDebug* debug = nullptr) const; + + bool EachKv(TGeoId regionId, TKvCallback callback) const; + + void EachPolygon(TPolygonCallback callback) const; + + void EachPart(const TPolygon& polygon, TPartCallback callback) const; + + const IGeoData& GeoData() const { + return *GeoDataProxy_->GeoData(); + } + + private: + TGeoDataProxyPtr GeoDataProxy_; + }; +} diff --git a/library/cpp/reverse_geocoder/core/ya.make b/library/cpp/reverse_geocoder/core/ya.make new file mode 100644 index 0000000000..9f7dc67464 --- /dev/null +++ b/library/cpp/reverse_geocoder/core/ya.make @@ -0,0 +1,28 @@ +LIBRARY() + +PEERDIR( + library/cpp/reverse_geocoder/library + library/cpp/reverse_geocoder/proto + library/cpp/digest/crc32c +) + +SRCS( + area_box.cpp + bbox.cpp + common.cpp + edge.cpp + reverse_geocoder.cpp + kv.cpp + location.cpp + part.cpp + point.cpp + polygon.cpp + region.cpp + geo_data/debug.cpp + geo_data/def.cpp + geo_data/geo_data.cpp + geo_data/map.cpp + geo_data/proxy.cpp +) + +END() diff --git a/library/cpp/reverse_geocoder/library/CMakeLists.darwin-x86_64.txt b/library/cpp/reverse_geocoder/library/CMakeLists.darwin-x86_64.txt new file mode 100644 index 0000000000..f82b4b8cd1 --- /dev/null +++ b/library/cpp/reverse_geocoder/library/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,21 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(cpp-reverse_geocoder-library) +target_link_libraries(cpp-reverse_geocoder-library PUBLIC + contrib-libs-cxxsupp + yutil +) +target_sources(cpp-reverse_geocoder-library PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/library/block_allocator.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/library/fs.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/library/log.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/library/pool_allocator.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/library/unaligned_iter.cpp +) diff --git a/library/cpp/reverse_geocoder/library/CMakeLists.linux-aarch64.txt b/library/cpp/reverse_geocoder/library/CMakeLists.linux-aarch64.txt new file mode 100644 index 0000000000..4b45fce452 --- /dev/null +++ b/library/cpp/reverse_geocoder/library/CMakeLists.linux-aarch64.txt @@ -0,0 +1,22 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(cpp-reverse_geocoder-library) +target_link_libraries(cpp-reverse_geocoder-library PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil +) +target_sources(cpp-reverse_geocoder-library PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/library/block_allocator.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/library/fs.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/library/log.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/library/pool_allocator.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/library/unaligned_iter.cpp +) diff --git a/library/cpp/reverse_geocoder/library/CMakeLists.linux-x86_64.txt b/library/cpp/reverse_geocoder/library/CMakeLists.linux-x86_64.txt new file mode 100644 index 0000000000..4b45fce452 --- /dev/null +++ b/library/cpp/reverse_geocoder/library/CMakeLists.linux-x86_64.txt @@ -0,0 +1,22 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(cpp-reverse_geocoder-library) +target_link_libraries(cpp-reverse_geocoder-library PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil +) +target_sources(cpp-reverse_geocoder-library PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/library/block_allocator.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/library/fs.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/library/log.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/library/pool_allocator.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/library/unaligned_iter.cpp +) diff --git a/library/cpp/reverse_geocoder/library/CMakeLists.txt b/library/cpp/reverse_geocoder/library/CMakeLists.txt new file mode 100644 index 0000000000..f8b31df0c1 --- /dev/null +++ b/library/cpp/reverse_geocoder/library/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) + include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/library/cpp/reverse_geocoder/library/CMakeLists.windows-x86_64.txt b/library/cpp/reverse_geocoder/library/CMakeLists.windows-x86_64.txt new file mode 100644 index 0000000000..f82b4b8cd1 --- /dev/null +++ b/library/cpp/reverse_geocoder/library/CMakeLists.windows-x86_64.txt @@ -0,0 +1,21 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(cpp-reverse_geocoder-library) +target_link_libraries(cpp-reverse_geocoder-library PUBLIC + contrib-libs-cxxsupp + yutil +) +target_sources(cpp-reverse_geocoder-library PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/library/block_allocator.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/library/fs.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/library/log.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/library/pool_allocator.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/library/unaligned_iter.cpp +) diff --git a/library/cpp/reverse_geocoder/library/block_allocator.cpp b/library/cpp/reverse_geocoder/library/block_allocator.cpp new file mode 100644 index 0000000000..56f61dc566 --- /dev/null +++ b/library/cpp/reverse_geocoder/library/block_allocator.cpp @@ -0,0 +1,40 @@ +#include "block_allocator.h" + +using namespace NReverseGeocoder; + +static size_t const MEMORY_IS_USED_FLAG = ~0ull; +static size_t const SIZEOF_SIZE = AlignMemory(sizeof(size_t)); + +void* NReverseGeocoder::TBlockAllocator::Allocate(size_t number) { + number = AlignMemory(number); + if (BytesAllocated_ + number + SIZEOF_SIZE > BytesLimit_) + ythrow yexception() << "Unable allocate memory"; + char* begin = ((char*)Data_) + BytesAllocated_; + char* end = begin + number; + *((size_t*)end) = MEMORY_IS_USED_FLAG; + BytesAllocated_ += number + SIZEOF_SIZE; + return begin; +} + +size_t NReverseGeocoder::TBlockAllocator::AllocateSize(size_t number) { + return AlignMemory(number) + SIZEOF_SIZE; +} + +static void RelaxBlock(char* begin, size_t* number) { + while (*number > 0) { + char* ptr = begin + *number - SIZEOF_SIZE; + if (*((size_t*)ptr) == MEMORY_IS_USED_FLAG) + return; + *number -= *((size_t*)ptr) + SIZEOF_SIZE; + } +} + +void NReverseGeocoder::TBlockAllocator::Deallocate(void* ptr, size_t number) { + number = AlignMemory(number); + char* begin = (char*)ptr; + char* end = begin + number; + if (*((size_t*)end) != MEMORY_IS_USED_FLAG) + ythrow yexception() << "Trying to deallocate not allocated pointer " << ptr; + *((size_t*)end) = number; + RelaxBlock((char*)Data_, &BytesAllocated_); +} diff --git a/library/cpp/reverse_geocoder/library/block_allocator.h b/library/cpp/reverse_geocoder/library/block_allocator.h new file mode 100644 index 0000000000..1189d6b25c --- /dev/null +++ b/library/cpp/reverse_geocoder/library/block_allocator.h @@ -0,0 +1,64 @@ +#pragma once + +#include "memory.h" + +#include <util/generic/yexception.h> + +namespace NReverseGeocoder { + class TBlockAllocator: public TNonCopyable { + public: + TBlockAllocator() + : Data_(nullptr) + , BytesAllocated_(0) + , BytesLimit_(0) + { + } + + TBlockAllocator(void* data, size_t bytesLimit) + : Data_(data) + , BytesAllocated_(0) + , BytesLimit_(bytesLimit) + { + } + + TBlockAllocator(TBlockAllocator&& a) + : TBlockAllocator() + { + DoSwap(Data_, a.Data_); + DoSwap(BytesAllocated_, a.BytesAllocated_); + DoSwap(BytesLimit_, a.BytesLimit_); + } + + TBlockAllocator& operator=(TBlockAllocator&& a) { + DoSwap(Data_, a.Data_); + DoSwap(BytesAllocated_, a.BytesAllocated_); + DoSwap(BytesLimit_, a.BytesLimit_); + return *this; + } + + virtual ~TBlockAllocator() { + } + + virtual void* Allocate(size_t number); + + static size_t AllocateSize(size_t number); + + virtual void Deallocate(void* ptr, size_t number); + + size_t TotalAllocatedSize() const { + return BytesAllocated_; + } + + void Setup(void* data, size_t bytesLimit) { + Data_ = data; + BytesLimit_ = bytesLimit; + BytesAllocated_ = 0; + } + + private: + void* Data_; + size_t BytesAllocated_; + size_t BytesLimit_; + }; + +} diff --git a/library/cpp/reverse_geocoder/library/fs.cpp b/library/cpp/reverse_geocoder/library/fs.cpp new file mode 100644 index 0000000000..98c3b9ef81 --- /dev/null +++ b/library/cpp/reverse_geocoder/library/fs.cpp @@ -0,0 +1,18 @@ +#include "fs.h" + +#include <util/folder/dirut.h> +#include <util/string/split.h> + +namespace NReverseGeocoder { + TVector<TString> GetDataFilesList(const char* input) { + if (IsDir(input)) { + return GetFileListInDirectory<TVector<TString>>(input); + } + + TVector<TString> result; + for (const auto& partIt : StringSplitter(input).Split(',')) { + result.push_back(TString(partIt.Token())); + } + return result; + } +} diff --git a/library/cpp/reverse_geocoder/library/fs.h b/library/cpp/reverse_geocoder/library/fs.h new file mode 100644 index 0000000000..4435f960c8 --- /dev/null +++ b/library/cpp/reverse_geocoder/library/fs.h @@ -0,0 +1,19 @@ +#pragma once + +#include <util/folder/iterator.h> +#include <util/string/vector.h> + +namespace NReverseGeocoder { + template <typename Cont> + Cont GetFileListInDirectory(const char* dirName) { + TDirIterator dirIt(dirName, TDirIterator::TOptions(FTS_LOGICAL)); + Cont dirContent; + for (auto file = dirIt.begin(); file != dirIt.end(); ++file) { + if (strcmp(file->fts_path, dirName)) + dirContent.push_back(file->fts_path); + } + return dirContent; + } + + TVector<TString> GetDataFilesList(const char* input); +} diff --git a/library/cpp/reverse_geocoder/library/log.cpp b/library/cpp/reverse_geocoder/library/log.cpp new file mode 100644 index 0000000000..44e6ddf287 --- /dev/null +++ b/library/cpp/reverse_geocoder/library/log.cpp @@ -0,0 +1,111 @@ +#include "log.h" + +#include <util/datetime/systime.h> +#include <util/generic/yexception.h> +#include <util/system/guard.h> +#include <util/system/mutex.h> + +using namespace NReverseGeocoder; + +static size_t const TIMESTAMP_LIMIT = 32; + +class TLogger { +public: + static TLogger& Inst() { + static TLogger logger; + return logger; + } + + void Setup(IOutputStream& out, ELogLevel level) { + Out_ = &out; + Level_ = level; + } + + void Write(ELogLevel level, const char* message) { + if (level <= Level_) { + TGuard<TMutex> Lock(Lock_); + Out_->Write(message, strlen(message)); + } + } + + IOutputStream& OutputStream() const { + return *Out_; + } + + ELogLevel Level() const { + return Level_; + } + +private: + TLogger() + : Out_() + , Level_(LOG_LEVEL_DISABLE) + { + } + + IOutputStream* Out_; + ELogLevel Level_; + TMutex Lock_; +}; + +ELogLevel NReverseGeocoder::LogLevel() { + return TLogger::Inst().Level(); +} + +void NReverseGeocoder::LogSetup(IOutputStream& out, ELogLevel level) { + TLogger::Inst().Setup(out, level); +} + +IOutputStream& NReverseGeocoder::LogOutputStream() { + return TLogger::Inst().OutputStream(); +} + +static const char* T(char* buffer) { + struct timeval timeVal; + gettimeofday(&timeVal, nullptr); + + struct tm timeInfo; + const time_t sec = timeVal.tv_sec; + localtime_r(&sec, &timeInfo); + + snprintf(buffer, TIMESTAMP_LIMIT, "%02d:%02d:%02d.%06d", + timeInfo.tm_hour, timeInfo.tm_min, timeInfo.tm_sec, (int)timeVal.tv_usec); + + return buffer; +} + +void NReverseGeocoder::LogWrite(ELogLevel level, const char* message) { + if (level > LogLevel()) + return; + + static const char* A[LOG_LEVEL_COUNT] = { + "", // LOG_LEVEL_DISABLE + "\033[90m", // LOG_LEVEL_ERROR + "\033[90m", // LOG_LEVEL_WARNING + "\033[90m", // LOG_LEVEL_INFO + "\033[90m", // LOG_LEVEL_DEBUG + }; + + static const char* B[LOG_LEVEL_COUNT] = { + "", // LOG_LEVEL_DISABLE + "\033[31;1mError\033[0m", // LOG_LEVEL_ERROR + "\033[33;1mWarn\033[0m", // LOG_LEVEL_WARNING + "\033[32;1mInfo\033[0m", // LOG_LEVEL_INFO + "Debug", // LOG_LEVEL_DEBUG + }; + + static const char* C[LOG_LEVEL_COUNT] = { + "", // LOG_LEVEL_DISABLE + "\n", // LOG_LEVEL_ERROR + "\n", // LOG_LEVEL_WARNING + "\n", // LOG_LEVEL_INFO + "\033[0m\n", // LOG_LEVEL_DEBUG + }; + + char buffer[LOG_MESSAGE_LIMIT], tbuffer[TIMESTAMP_LIMIT]; + // Ignore logger snprintf errors. + snprintf(buffer, LOG_MESSAGE_LIMIT, "%s(%s) %s: %s%s", + A[level], T(tbuffer), B[level], message, C[level]); + + TLogger::Inst().Write(level, buffer); +} diff --git a/library/cpp/reverse_geocoder/library/log.h b/library/cpp/reverse_geocoder/library/log.h new file mode 100644 index 0000000000..44cb0cefcf --- /dev/null +++ b/library/cpp/reverse_geocoder/library/log.h @@ -0,0 +1,65 @@ +#pragma once + +#include <util/generic/yexception.h> +#include <util/stream/output.h> + +#include <cstdio> + +namespace NReverseGeocoder { + size_t const LOG_MESSAGE_LIMIT = 1024; + + enum ELogLevel { + LOG_LEVEL_DISABLE = 0, + LOG_LEVEL_ERROR, + LOG_LEVEL_WARNING, + LOG_LEVEL_INFO, + LOG_LEVEL_DEBUG, + LOG_LEVEL_COUNT + }; + + // Init logger. Setup OutputStream and logger level. + void LogSetup(IOutputStream& out, ELogLevel level); + + // Write log message with colors, level and current time. + // Example: + // (13:24:11.123456) Info: Good job! + // (13:24:11.323456) Warn: Ooops :( + // (13:24:22.456789) Error: Hello, world! + void LogWrite(ELogLevel level, const char* message); + + // Log output file descriptor. + IOutputStream& LogOutputStream(); + + // Current log level. + ELogLevel LogLevel(); + + template <typename... TArgs> + void LogWrite(ELogLevel level, const char* fmt, TArgs... args) { + if (level <= LogLevel()) { + char buffer[LOG_MESSAGE_LIMIT]; + // Ignore logger snprintf errors. + snprintf(buffer, LOG_MESSAGE_LIMIT, fmt, std::forward<TArgs>(args)...); + LogWrite(level, buffer); + } + } + + template <typename... TArgs> + void LogError(TArgs... args) { + LogWrite(LOG_LEVEL_ERROR, std::forward<TArgs>(args)...); + } + + template <typename... TArgs> + void LogWarning(TArgs... args) { + LogWrite(LOG_LEVEL_WARNING, std::forward<TArgs>(args)...); + } + + template <typename... TArgs> + void LogInfo(TArgs... args) { + LogWrite(LOG_LEVEL_INFO, std::forward<TArgs>(args)...); + } + + template <typename... TArgs> + void LogDebug(TArgs... args) { + LogWrite(LOG_LEVEL_DEBUG, std::forward<TArgs>(args)...); + } +} diff --git a/library/cpp/reverse_geocoder/library/memory.h b/library/cpp/reverse_geocoder/library/memory.h new file mode 100644 index 0000000000..ecbe8bcd66 --- /dev/null +++ b/library/cpp/reverse_geocoder/library/memory.h @@ -0,0 +1,23 @@ +#pragma once + +#include <util/system/types.h> + +namespace NReverseGeocoder { + constexpr ui64 B = 1ull; + constexpr ui64 KB = 1024 * B; + constexpr ui64 MB = 1024 * KB; + constexpr ui64 GB = 1024 * MB; + + constexpr size_t MEMORY_ALIGNMENT = 16ull; + + inline unsigned long long AlignMemory(unsigned long long x) { + if (x % MEMORY_ALIGNMENT == 0) + return x; + return x + MEMORY_ALIGNMENT - x % MEMORY_ALIGNMENT; + } + + inline bool IsAlignedMemory(void* ptr) { + return ((uintptr_t)ptr) % MEMORY_ALIGNMENT == 0; + } + +} diff --git a/library/cpp/reverse_geocoder/library/pool_allocator.cpp b/library/cpp/reverse_geocoder/library/pool_allocator.cpp new file mode 100644 index 0000000000..0d841f7db0 --- /dev/null +++ b/library/cpp/reverse_geocoder/library/pool_allocator.cpp @@ -0,0 +1,17 @@ +#include "memory.h" +#include "pool_allocator.h" + +#include <util/generic/yexception.h> + +using namespace NReverseGeocoder; + +NReverseGeocoder::TPoolAllocator::TPoolAllocator(size_t poolSize) { + Ptr_ = new char[poolSize]; + Size_ = poolSize; + Setup(Ptr_, Size_); +} + +NReverseGeocoder::TPoolAllocator::~TPoolAllocator() { + if (Ptr_) + delete[] Ptr_; +} diff --git a/library/cpp/reverse_geocoder/library/pool_allocator.h b/library/cpp/reverse_geocoder/library/pool_allocator.h new file mode 100644 index 0000000000..f98bbcd3c1 --- /dev/null +++ b/library/cpp/reverse_geocoder/library/pool_allocator.h @@ -0,0 +1,42 @@ +#pragma once + +#include "block_allocator.h" + +#include <util/generic/utility.h> +#include <util/generic/noncopyable.h> + +namespace NReverseGeocoder { + class TPoolAllocator: public TBlockAllocator { + public: + TPoolAllocator() + : Ptr_(nullptr) + , Size_(0) + { + } + + TPoolAllocator(TPoolAllocator&& a) + : TBlockAllocator(std::forward<TBlockAllocator>(a)) + , Ptr_(nullptr) + , Size_(0) + { + DoSwap(Ptr_, a.Ptr_); + DoSwap(Size_, a.Size_); + } + + TPoolAllocator& operator=(TPoolAllocator&& a) { + TBlockAllocator::operator=(std::forward<TBlockAllocator>(a)); + DoSwap(Ptr_, a.Ptr_); + DoSwap(Size_, a.Size_); + return *this; + } + + explicit TPoolAllocator(size_t poolSize); + + ~TPoolAllocator() override; + + private: + char* Ptr_; + size_t Size_; + }; + +} diff --git a/library/cpp/reverse_geocoder/library/system.h b/library/cpp/reverse_geocoder/library/system.h new file mode 100644 index 0000000000..499fb2bd91 --- /dev/null +++ b/library/cpp/reverse_geocoder/library/system.h @@ -0,0 +1,3 @@ +#pragma once + +#define SYSTEM_ENDIAN_FLAG (htonl(337)) diff --git a/library/cpp/reverse_geocoder/library/unaligned_iter.cpp b/library/cpp/reverse_geocoder/library/unaligned_iter.cpp new file mode 100644 index 0000000000..0322b677dc --- /dev/null +++ b/library/cpp/reverse_geocoder/library/unaligned_iter.cpp @@ -0,0 +1 @@ +#include "unaligned_iter.h" diff --git a/library/cpp/reverse_geocoder/library/unaligned_iter.h b/library/cpp/reverse_geocoder/library/unaligned_iter.h new file mode 100644 index 0000000000..827a3e2fd2 --- /dev/null +++ b/library/cpp/reverse_geocoder/library/unaligned_iter.h @@ -0,0 +1,64 @@ +#pragma once + +#include <util/system/unaligned_mem.h> +#include <iterator> + +namespace NReverseGeocoder { + /** + * Random-access iterator over a read-only memory range + * of trivially copyable items that may be not aligned properly. + * + * When dereferencing, a copy of item is returned, not a reference. + * Be sure that sizeof(T) is small enough. + * + * Iterator is useful for LowerBound/UpperBound STL algorithms. + */ + template <class T> + class TUnalignedIter: public std::iterator<std::random_access_iterator_tag, T> { + public: + using TSelf = TUnalignedIter<T>; + + explicit TUnalignedIter(const T* ptr) + : Ptr(ptr) + { + } + + T operator*() const { + return ReadUnaligned<T>(Ptr); + } + + bool operator==(TSelf other) const { + return Ptr == other.Ptr; + } + + bool operator<(TSelf other) const { + return Ptr < other.Ptr; + } + + TSelf operator+(ptrdiff_t delta) const { + return TSelf{Ptr + delta}; + } + + ptrdiff_t operator-(TSelf other) const { + return Ptr - other.Ptr; + } + + TSelf& operator+=(ptrdiff_t delta) { + Ptr += delta; + return *this; + } + + TSelf& operator++() { + ++Ptr; + return *this; + } + + private: + const T* Ptr; + }; + + template <class T> + TUnalignedIter<T> UnalignedIter(const T* ptr) { + return TUnalignedIter<T>(ptr); + } +} diff --git a/library/cpp/reverse_geocoder/library/ya.make b/library/cpp/reverse_geocoder/library/ya.make new file mode 100644 index 0000000000..ec2eb205a8 --- /dev/null +++ b/library/cpp/reverse_geocoder/library/ya.make @@ -0,0 +1,11 @@ +LIBRARY() + +SRCS( + block_allocator.cpp + fs.cpp + log.cpp + pool_allocator.cpp + unaligned_iter.cpp +) + +END() diff --git a/library/cpp/reverse_geocoder/proto/CMakeLists.darwin-x86_64.txt b/library/cpp/reverse_geocoder/proto/CMakeLists.darwin-x86_64.txt new file mode 100644 index 0000000000..8d1df0fdf8 --- /dev/null +++ b/library/cpp/reverse_geocoder/proto/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,56 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) + +add_library(cpp-reverse_geocoder-proto) +target_link_libraries(cpp-reverse_geocoder-proto PUBLIC + contrib-libs-cxxsupp + yutil + contrib-libs-protobuf +) +target_proto_messages(cpp-reverse_geocoder-proto PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/proto/geo_data.proto + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/proto/region.proto +) +target_proto_addincls(cpp-reverse_geocoder-proto + ./ + ${CMAKE_SOURCE_DIR}/ + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src +) +target_proto_outs(cpp-reverse_geocoder-proto + --cpp_out=${CMAKE_BINARY_DIR}/ + --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ +) diff --git a/library/cpp/reverse_geocoder/proto/CMakeLists.linux-aarch64.txt b/library/cpp/reverse_geocoder/proto/CMakeLists.linux-aarch64.txt new file mode 100644 index 0000000000..b53c1692ee --- /dev/null +++ b/library/cpp/reverse_geocoder/proto/CMakeLists.linux-aarch64.txt @@ -0,0 +1,57 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) + +add_library(cpp-reverse_geocoder-proto) +target_link_libraries(cpp-reverse_geocoder-proto PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + contrib-libs-protobuf +) +target_proto_messages(cpp-reverse_geocoder-proto PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/proto/geo_data.proto + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/proto/region.proto +) +target_proto_addincls(cpp-reverse_geocoder-proto + ./ + ${CMAKE_SOURCE_DIR}/ + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src +) +target_proto_outs(cpp-reverse_geocoder-proto + --cpp_out=${CMAKE_BINARY_DIR}/ + --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ +) diff --git a/library/cpp/reverse_geocoder/proto/CMakeLists.linux-x86_64.txt b/library/cpp/reverse_geocoder/proto/CMakeLists.linux-x86_64.txt new file mode 100644 index 0000000000..b53c1692ee --- /dev/null +++ b/library/cpp/reverse_geocoder/proto/CMakeLists.linux-x86_64.txt @@ -0,0 +1,57 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) + +add_library(cpp-reverse_geocoder-proto) +target_link_libraries(cpp-reverse_geocoder-proto PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + contrib-libs-protobuf +) +target_proto_messages(cpp-reverse_geocoder-proto PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/proto/geo_data.proto + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/proto/region.proto +) +target_proto_addincls(cpp-reverse_geocoder-proto + ./ + ${CMAKE_SOURCE_DIR}/ + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src +) +target_proto_outs(cpp-reverse_geocoder-proto + --cpp_out=${CMAKE_BINARY_DIR}/ + --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ +) diff --git a/library/cpp/reverse_geocoder/proto/CMakeLists.txt b/library/cpp/reverse_geocoder/proto/CMakeLists.txt new file mode 100644 index 0000000000..f8b31df0c1 --- /dev/null +++ b/library/cpp/reverse_geocoder/proto/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) + include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/library/cpp/reverse_geocoder/proto/CMakeLists.windows-x86_64.txt b/library/cpp/reverse_geocoder/proto/CMakeLists.windows-x86_64.txt new file mode 100644 index 0000000000..8d1df0fdf8 --- /dev/null +++ b/library/cpp/reverse_geocoder/proto/CMakeLists.windows-x86_64.txt @@ -0,0 +1,56 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) + +add_library(cpp-reverse_geocoder-proto) +target_link_libraries(cpp-reverse_geocoder-proto PUBLIC + contrib-libs-cxxsupp + yutil + contrib-libs-protobuf +) +target_proto_messages(cpp-reverse_geocoder-proto PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/proto/geo_data.proto + ${CMAKE_SOURCE_DIR}/library/cpp/reverse_geocoder/proto/region.proto +) +target_proto_addincls(cpp-reverse_geocoder-proto + ./ + ${CMAKE_SOURCE_DIR}/ + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src +) +target_proto_outs(cpp-reverse_geocoder-proto + --cpp_out=${CMAKE_BINARY_DIR}/ + --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ +) diff --git a/library/cpp/reverse_geocoder/proto/geo_data.proto b/library/cpp/reverse_geocoder/proto/geo_data.proto new file mode 100644 index 0000000000..00ecb48bec --- /dev/null +++ b/library/cpp/reverse_geocoder/proto/geo_data.proto @@ -0,0 +1,42 @@ +package NReverseGeocoder.NProto; + +message TGeoData { + required uint64 Magic = 1; + required uint64 Version = 2; + optional uint64 Points = 3; + optional uint64 PointsNumber = 4; + optional uint64 PointsCrc32 = 5; + optional uint64 Edges = 6; + optional uint64 EdgesNumber = 7; + optional uint64 EdgesCrc32 = 8; + optional uint64 EdgeRefs = 9; + optional uint64 EdgeRefsNumber = 10; + optional uint64 EdgeRefsCrc32 = 11; + optional uint64 Parts = 12; + optional uint64 PartsNumber = 13; + optional uint64 PartsCrc32 = 14; + optional uint64 Polygons = 15; + optional uint64 PolygonsNumber = 16; + optional uint64 PolygonsCrc32 = 17; + optional uint64 PolygonRefs = 18; + optional uint64 PolygonRefsNumber = 19; + optional uint64 PolygonRefsCrc32 = 20; + optional uint64 Boxes = 21; + optional uint64 BoxesNumber = 22; + optional uint64 BoxesCrc32 = 23; + optional uint64 Blobs = 24; + optional uint64 BlobsNumber = 25; + optional uint64 BlobsCrc32 = 26; + optional uint64 Kvs = 27; + optional uint64 KvsNumber = 28; + optional uint64 KvsCrc32 = 29; + optional uint64 Regions = 30; + optional uint64 RegionsNumber = 31; + optional uint64 RegionsCrc32 = 32; + optional uint64 RawPolygons = 33; + optional uint64 RawPolygonsNumber = 34; + optional uint64 RawPolygonsCrc32 = 35; + optional uint64 RawEdgeRefs = 36; + optional uint64 RawEdgeRefsNumber = 37; + optional uint64 RawEdgeRefsCrc32 = 38; +}; diff --git a/library/cpp/reverse_geocoder/proto/region.proto b/library/cpp/reverse_geocoder/proto/region.proto new file mode 100644 index 0000000000..b782331628 --- /dev/null +++ b/library/cpp/reverse_geocoder/proto/region.proto @@ -0,0 +1,32 @@ +package NReverseGeocoder.NProto; + +message TLocation { + required double Lat = 1; + required double Lon = 2; +} + +message TPolygon { + required uint64 PolygonId = 1; + repeated TLocation Locations = 2; + + enum EType { + TYPE_UNKNOWN = 0; + TYPE_INNER = 1; + TYPE_OUTER = 2; + } + + required EType Type = 3; +} + +message TKv { + required string K = 1; + required string V = 2; +} + +message TRegion { + required uint64 RegionId = 1; + optional uint64 ParentId = 2; + repeated TPolygon Polygons = 3; + repeated TKv Kvs = 4; + repeated string Blobs = 5; +} diff --git a/library/cpp/reverse_geocoder/proto/ya.make b/library/cpp/reverse_geocoder/proto/ya.make new file mode 100644 index 0000000000..b6f7156210 --- /dev/null +++ b/library/cpp/reverse_geocoder/proto/ya.make @@ -0,0 +1,10 @@ +PROTO_LIBRARY() + +SRCS( + geo_data.proto + region.proto +) + +EXCLUDE_TAGS(GO_PROTO) + +END() diff --git a/library/cpp/robots_txt/CMakeLists.darwin-x86_64.txt b/library/cpp/robots_txt/CMakeLists.darwin-x86_64.txt new file mode 100644 index 0000000000..408bf12f04 --- /dev/null +++ b/library/cpp/robots_txt/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,26 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(robotstxtcfg) + +add_library(library-cpp-robots_txt) +target_link_libraries(library-cpp-robots_txt PUBLIC + contrib-libs-cxxsupp + yutil + cpp-robots_txt-robotstxtcfg + library-cpp-case_insensitive_string + library-cpp-charset + cpp-string_utils-url + library-cpp-uri +) +target_sources(library-cpp-robots_txt PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/prefix_tree.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/prefix_tree_rules_handler.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/robots_txt_parser.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/rules_handler.cpp +) diff --git a/library/cpp/robots_txt/CMakeLists.linux-aarch64.txt b/library/cpp/robots_txt/CMakeLists.linux-aarch64.txt new file mode 100644 index 0000000000..73a209cbbe --- /dev/null +++ b/library/cpp/robots_txt/CMakeLists.linux-aarch64.txt @@ -0,0 +1,27 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(robotstxtcfg) + +add_library(library-cpp-robots_txt) +target_link_libraries(library-cpp-robots_txt PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + cpp-robots_txt-robotstxtcfg + library-cpp-case_insensitive_string + library-cpp-charset + cpp-string_utils-url + library-cpp-uri +) +target_sources(library-cpp-robots_txt PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/prefix_tree.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/prefix_tree_rules_handler.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/robots_txt_parser.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/rules_handler.cpp +) diff --git a/library/cpp/robots_txt/CMakeLists.linux-x86_64.txt b/library/cpp/robots_txt/CMakeLists.linux-x86_64.txt new file mode 100644 index 0000000000..73a209cbbe --- /dev/null +++ b/library/cpp/robots_txt/CMakeLists.linux-x86_64.txt @@ -0,0 +1,27 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(robotstxtcfg) + +add_library(library-cpp-robots_txt) +target_link_libraries(library-cpp-robots_txt PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + cpp-robots_txt-robotstxtcfg + library-cpp-case_insensitive_string + library-cpp-charset + cpp-string_utils-url + library-cpp-uri +) +target_sources(library-cpp-robots_txt PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/prefix_tree.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/prefix_tree_rules_handler.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/robots_txt_parser.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/rules_handler.cpp +) diff --git a/library/cpp/robots_txt/CMakeLists.txt b/library/cpp/robots_txt/CMakeLists.txt new file mode 100644 index 0000000000..f8b31df0c1 --- /dev/null +++ b/library/cpp/robots_txt/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) + include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/library/cpp/robots_txt/CMakeLists.windows-x86_64.txt b/library/cpp/robots_txt/CMakeLists.windows-x86_64.txt new file mode 100644 index 0000000000..408bf12f04 --- /dev/null +++ b/library/cpp/robots_txt/CMakeLists.windows-x86_64.txt @@ -0,0 +1,26 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(robotstxtcfg) + +add_library(library-cpp-robots_txt) +target_link_libraries(library-cpp-robots_txt PUBLIC + contrib-libs-cxxsupp + yutil + cpp-robots_txt-robotstxtcfg + library-cpp-case_insensitive_string + library-cpp-charset + cpp-string_utils-url + library-cpp-uri +) +target_sources(library-cpp-robots_txt PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/prefix_tree.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/prefix_tree_rules_handler.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/robots_txt_parser.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/rules_handler.cpp +) diff --git a/library/cpp/robots_txt/constants.h b/library/cpp/robots_txt/constants.h new file mode 100644 index 0000000000..e5e2a57e18 --- /dev/null +++ b/library/cpp/robots_txt/constants.h @@ -0,0 +1,9 @@ +#pragma once + +#include <util/generic/size_literals.h> +#include <util/system/defaults.h> + + +constexpr auto robots_max = 500_KB; +constexpr auto max_rules_count = 10'000; +constexpr auto max_rule_length = 10_KB; diff --git a/library/cpp/robots_txt/prefix_tree.cpp b/library/cpp/robots_txt/prefix_tree.cpp new file mode 100644 index 0000000000..f7b1848a43 --- /dev/null +++ b/library/cpp/robots_txt/prefix_tree.cpp @@ -0,0 +1,172 @@ +#include <cstring> +#include <algorithm> + +#include "prefix_tree.h" + +TPrefixTreeNodeElement::TPrefixTreeNodeElement() + : Key(nullptr) + , KeyLen(0) + , Val(-1) + , Index(-1) +{ +} + +TPrefixTreeNodeElement::TPrefixTreeNodeElement(const char* key, i32 keyLen = 0, i32 val = -1, i32 index = -1) + : Key(key) + , KeyLen(keyLen) + , Val(val) + , Index(index) +{ +} + +TPrefixTreeNode::TPrefixTreeNode() + : Elements() +{ +} + +int TPrefixTreeNode::Find(char ch) const { + for (size_t i = 0; i < Elements.size(); ++i) + if (ch == *(Elements[i].Key)) + return i; + return -1; +} + +void TPrefixTreeNode::Set(const char* key, i32 keyLen, i32 val, i32 index) { + TPrefixTreeNodeElement element(key, keyLen, val, index); + int i = Find(*key); + if (i < 0) + Elements.push_back(element); + else + Elements[i] = element; +} + +void TPrefixTreeNode::Dump(FILE* logFile) const { + if (!logFile) + logFile = stderr; + fprintf(logFile, "size=%" PRISZT "\n", Elements.size()); + static char b[1234]; + for (size_t i = 0; i < Elements.size(); ++i) { + strncpy(b, Elements[i].Key, Elements[i].KeyLen); + b[Elements[i].KeyLen] = 0; + fprintf(logFile, "{key=[%s]:%d, val=%d, index=%d}\n", b, Elements[i].KeyLen, Elements[i].Val, Elements[i].Index); + } +} + +void TPrefixTree::Dump(FILE* logFile) const { + if (!logFile) + logFile = stderr; + fprintf(logFile, "%" PRISZT " nodes\n", Nodes.size()); + for (size_t i = 0; i < Nodes.size(); ++i) { + fprintf(logFile, "%" PRISZT ": ", i); + Nodes[i].Dump(logFile); + fprintf(logFile, "\n"); + } +} + +TPrefixTree::TPrefixTree(int maxSize) { + Init(maxSize); +} + +void TPrefixTree::Init(int maxSize) { + Nodes.clear(); + Nodes.reserve(std::max(maxSize + 1, 1)); + Nodes.push_back(TPrefixTreeNode()); +} + +void TPrefixTree::Clear() { + Nodes.clear(); + Init(0); +} + +void TPrefixTree::Add(const char* s, i32 index) { + AddInternal(s, Nodes[0], index); +} + +void TPrefixTree::AddInternal(const char* s, TPrefixTreeNode& node, i32 index) { + if (!s || !*s) + return; + + int i = node.Find(*s); + if (i >= 0) { + TPrefixTreeNodeElement& d = node.Elements[i]; + const char* p = d.Key; + while (*s && (p - d.Key) < d.KeyLen && *s == *p) + ++s, ++p; + + if (*s) { + if ((p - d.Key) < d.KeyLen) { + Nodes.push_back(TPrefixTreeNode()); + Nodes.back().Set(p, d.KeyLen - (p - d.Key), d.Val, d.Index); + Nodes.back().Set(s, strlen(s), -1, index); + + d.Val = Nodes.size() - 1; + d.KeyLen = p - d.Key; + d.Index = INDEX_BOUND; + } else { + if (d.Val != -1 && index < d.Index) + AddInternal(s, Nodes[d.Val], index); + } + } else { + if ((p - d.Key) < d.KeyLen) { + Nodes.push_back(TPrefixTreeNode()); + Nodes.back().Set(p, d.KeyLen - (p - d.Key), d.Val, d.Index); + d.Val = Nodes.size() - 1; + d.KeyLen = p - d.Key; + d.Index = index; + } else { + d.Index = std::min(d.Index, index); + } + } + } else { + node.Set(s, strlen(s), -1, index); + } +} + +int TPrefixTree::GetMemorySize() const { + int res = Nodes.capacity() * sizeof(TPrefixTreeNode); + for (size_t i = 0; i < Nodes.size(); ++i) + res += Nodes[i].Elements.capacity() * sizeof(TPrefixTreeNodeElement); + return res; +} + +void TPrefixTree::Compress() { + Nodes.shrink_to_fit(); + for (size_t i = 0; i < Nodes.size(); ++i) + Nodes[i].Elements.shrink_to_fit(); +} + +i32 TPrefixTree::MinPrefixIndex(const char* s) const { + if (!*s) + return -1; + int i = Nodes[0].Find(*s); + if (i < 0) + return -1; + const TPrefixTreeNodeElement* d = &Nodes[0].Elements[i]; + + const char* p = d->Key; + if (!p || !*p) + return -1; + + i32 result = INDEX_BOUND; + i32 nodeIndex = 0; + while (*s == *p) { + if (++p - d->Key >= d->KeyLen) + result = std::min(result, d->Index); + if (!*++s) + break; + + if (p - d->Key >= d->KeyLen) { + nodeIndex = d->Val; + if (nodeIndex == -1) + break; + i = Nodes[nodeIndex].Find(*s); + if (i < 0) + break; + d = &Nodes[nodeIndex].Elements[i]; + p = d->Key; + if (!p || !*p) + break; + } + } + return result < INDEX_BOUND ? result : -1; +} diff --git a/library/cpp/robots_txt/prefix_tree.h b/library/cpp/robots_txt/prefix_tree.h new file mode 100644 index 0000000000..5feafcb74d --- /dev/null +++ b/library/cpp/robots_txt/prefix_tree.h @@ -0,0 +1,47 @@ +#pragma once + +#include <util/generic/ptr.h> +#include <util/generic/vector.h> +#include <cstdio> +#include <util/generic/noncopyable.h> + +struct TPrefixTreeNodeElement { + const char* Key; + i32 KeyLen; + i32 Val; + i32 Index; + + TPrefixTreeNodeElement(); + TPrefixTreeNodeElement(const char*, i32, i32, i32); +}; + +class TPrefixTreeNode { +public: + TVector<TPrefixTreeNodeElement> Elements; + TPrefixTreeNode(); + + int Find(char) const; + void Set(const char*, i32, i32, i32); + void Dump(FILE*) const; +}; + +class TPrefixTree : TNonCopyable { +private: + static const i32 INDEX_BOUND = 1 << 30; + + TVector<TPrefixTreeNode> Nodes; + +public: + void Init(int); + TPrefixTree(int); + + void Add(const char*, i32); + i32 MinPrefixIndex(const char*) const; + void Clear(); + void Dump(FILE*) const; + int GetMemorySize() const; + void Compress(); + +private: + void AddInternal(const char*, TPrefixTreeNode&, i32); +}; diff --git a/library/cpp/robots_txt/prefix_tree_rules_handler.cpp b/library/cpp/robots_txt/prefix_tree_rules_handler.cpp new file mode 100644 index 0000000000..8dd579d060 --- /dev/null +++ b/library/cpp/robots_txt/prefix_tree_rules_handler.cpp @@ -0,0 +1,706 @@ +#include "robots_txt.h" + +#include <util/digest/fnv.h> +#include <util/system/tls.h> +#include <util/generic/buffer.h> +#include <util/generic/yexception.h> + +namespace { + +TString NormalizeRule(TStringBuf rule) { + TString result; + result.reserve(rule.size() + 1); + + // remove consecutive '*' + for (auto c : rule) { + if (c != '*' || !result.EndsWith('*')) { + result.append(c); + } + } + + if (rule == "*") { + result = "/*"; + return result; + } + + // unify suffix + if (result.EndsWith('$')) { + result.pop_back(); + } else if (!result.EndsWith('*')) { + result.append('*'); + } + + return result; +} + +// Prefix rules +bool IsPrefixRule(TStringBuf rule) { + return rule.EndsWith('*') && !TStringBuf(rule.begin(), rule.end() - 1).Contains('*'); +} + +// Converts rule to internal representation, i.e. +// For prefix rules: "/foo", 'D' -> 'D', "/foo" +// For generic rules: "/*foo", 'D' -> ("/*/*foo*", 'd') or ("/*foo$", 'A') -> ("/*foo", 'a') +// The distinction is in uppercase/lowercase rule type +std::pair<TString, char> ConvertRule(TStringBuf rule, char type) { + switch (type) { + case 'H': + case 'S': + case 'C': + case 'P': + return {TString(rule), type}; + case 'A': + case 'D': + break; + default: + return {{}, type}; + } + + auto result = NormalizeRule(rule); + if (IsPrefixRule(result)) { + result.pop_back(); // remove extra '*' from the end + } else { + type = tolower(type); + } + + return {std::move(result), type}; +} + +} // namespace + +TPrefixTreeRobotsTxtRulesHandler::TPrefixTreeRobotsTxtRulesHandler( + TBotIdSet supportedBotIds, + int robotsMaxSize, + int maxRulesNumber, + bool saveDataForAnyBot) + : TRobotsTxtRulesHandlerBase(supportedBotIds, robotsMaxSize, maxRulesNumber, saveDataForAnyBot) +{} + +TPrefixTreeRobotsTxtRulesHandler::TPrefixTreeRobotsTxtRulesHandler( + std::initializer_list<ui32> supportedBotIds, + int robotsMaxSize, + int maxRulesNumber, + bool saveDataForAnyBot) + : TRobotsTxtRulesHandlerBase(TBotIdSet(supportedBotIds), robotsMaxSize, maxRulesNumber, saveDataForAnyBot) +{} + +TPrefixTreeRobotsTxtRulesHandler::TPrefixTreeRobotsTxtRulesHandler( + const TSet<ui32>& supportedBotIds, + int robotsMaxSize, + int maxRulesNumber, + bool saveDataForAnyBot) + : TRobotsTxtRulesHandlerBase(supportedBotIds, robotsMaxSize, maxRulesNumber, saveDataForAnyBot) +{} + +bool TPrefixTreeRobotsTxtRulesHandler::Empty(const ui32 botId) const { + const auto& botInfo = BotIdToPrefixTreeBotInfo[GetNotOptimizedBotId(botId)]; + return !botInfo || (botInfo->BufferPosition <= sizeof(botInfo->BufferPosition)); +} + +TRobotsTxtRulesIterator TPrefixTreeRobotsTxtRulesHandler::GetRulesIterator(const ui32 botId) const { + const auto& botInfo = BotIdToPrefixTreeBotInfo[GetNotOptimizedBotId(botId)]; + if (!botInfo) { + return {}; + } + return TRobotsTxtRulesIterator(botInfo->Buffer.Get() + sizeof(botInfo->BufferPosition), botInfo->Buffer.Get() + botInfo->BufferPosition); +} + +size_t TPrefixTreeRobotsTxtRulesHandler::GetMemorySize() { + size_t allBotsSize = 0; + for (const auto& botInfo : BotIdToPrefixTreeBotInfo) { + if (!botInfo) { + continue; + } + + allBotsSize += botInfo->PrefixRules.GetMemorySize() + + botInfo->BufferSize * sizeof(char) + + botInfo->ComplexRulesSize * sizeof(char**) + + botInfo->RulesSize * sizeof(char*) + (1 << 8); + } + return allBotsSize; +} + +void TPrefixTreeRobotsTxtRulesHandler::ClearInternal(const ui32 botId) { + if (botId >= BotIdToPrefixTreeBotInfo.size()) { + return; + } + BotIdToPrefixTreeBotInfo[botId].Reset(); + TRobotsTxtRulesHandlerBase::ClearInternal(botId); +} + +bool TPrefixTreeRobotsTxtRulesHandler::OptimizeSize() { + ResetOptimized(); + + TMap<ui64, ui32> hashToBotId; + for (auto botId : LoadedBotIds) { + auto& botInfo = BotIdToPrefixTreeBotInfo[botId]; + if (botInfo->BufferPosition <= sizeof(ui32)) { + botInfo.Reset(); + LoadedBotIds.remove(botId); + continue; + } + + ui64 hash = FnvHash<ui64>(botInfo->Buffer.Get(), botInfo->BufferPosition); + if (auto p = hashToBotId.FindPtr(hash)) { + OptimizedBotIdToStoredBotId[botId] = *p; + ClearInternal(botId); + botInfo.Reset(); + } else { + hashToBotId[hash] = botId; + } + } + + if (IsFullTotal()) { + DoAllowAll(); + return false; + } + + return true; +} + +void TPrefixTreeRobotsTxtRulesHandler::Clear() { + for (size_t botId = 0; botId < robotstxtcfg::max_botid; ++botId) + if (IsBotIdSupported(botId)) + ClearInternal(botId); + TRobotsTxtRulesHandlerBase::Clear(); +} + +void TPrefixTreeRobotsTxtRulesHandler::ResizeBuffer(const ui32 botId, int newSize) { + auto& botInfo = GetInfo(botId); + TArrayHolder<char> newBuffer(new char[newSize]); + memcpy(newBuffer.Get(), botInfo.Buffer.Get(), std::min(botInfo.BufferSize, newSize)); + botInfo.Buffer.Swap(newBuffer); + botInfo.BufferSize = newSize; +} + +bool TPrefixTreeRobotsTxtRulesHandler::AddRule(const ui32 botId, TStringBuf rule, char type) { + if (rule.empty() || rule.Contains('\0')) { + return true; + } + + auto& botInfo = GetInfo(botId); + + if (IsFull(botId, rule.size())) { + DoAllowAll(); + return false; + } + + auto [convertedRule, convertedType] = ConvertRule(rule, type); + const auto len = convertedRule.size() + 2; // 1 byte for convertedType and another for '\0' + + if (auto newPos = botInfo.BufferPosition + len; newPos >= size_t(botInfo.BufferSize)) { + size_t newSize = botInfo.BufferSize; + while (newPos >= newSize) + newSize *= 2; + ResizeBuffer(botId, newSize); + } + + auto out = botInfo.Buffer.Get() + botInfo.BufferPosition; + *out++ = convertedType; + strcpy(out, convertedRule.data()); + botInfo.BufferPosition += len; + + if (type == 'A' || type == 'D') { + botInfo.RulesPosition++; + } + + return true; +} + +const char* TPrefixTreeRobotsTxtRulesHandler::GetRule(const ui32 botId, const char* s, char type) const { + const auto& botInfo = BotIdToPrefixTreeBotInfo[GetNotOptimizedBotId(botId)]; + if (!botInfo) { + return nullptr; + } + + int m = botInfo->RulesPosition + 1; + int k = botInfo->PrefixRules.MinPrefixIndex(s); + if (k >= 0) + m = k; + char* rule; + int j; + for (int i = 0; i < botInfo->ComplexRulesPosition; ++i) { + rule = *botInfo->ComplexRules.Get()[i]; + j = botInfo->ComplexRules.Get()[i] - botInfo->Rules.Get(); + if (j >= m) + break; + if (CheckRule(s, rule)) { + m = j; + break; + } + } + if (m >= botInfo->RulesPosition) + return nullptr; + return toupper(*(botInfo->Rules.Get()[m] - 1)) == type ? botInfo->Rules.Get()[m] : nullptr; +} + +inline bool TPrefixTreeRobotsTxtRulesHandler::IsAllowAll(const ui32 botId) const { + const auto id = GetMappedBotId(botId, false); + auto& botInfo = BotIdToPrefixTreeBotInfo[id ? *id : robotstxtcfg::id_anybot]; + return botInfo && botInfo->AllowAll; +} + +inline bool TPrefixTreeRobotsTxtRulesHandler::IsAllowAll() const { + for (ui32 botId = 0; botId < robotstxtcfg::max_botid; ++botId) + if (robotstxtcfg::IsYandexBotId(botId) && IsBotIdSupported(botId) && !IsAllowAll(botId)) { + return false; + } + + return true; +} + +inline bool TPrefixTreeRobotsTxtRulesHandler::IsDisallowAll(const ui32 botId, bool useAny) const { + const auto id = GetMappedBotId(botId, false); + if (id) { + const auto& botInfo = BotIdToPrefixTreeBotInfo[*id]; + return botInfo && botInfo->DisallowAll; + } + + auto& botInfo = BotIdToPrefixTreeBotInfo[robotstxtcfg::id_anybot]; + return useAny && botInfo && botInfo->DisallowAll; +} + +inline bool TPrefixTreeRobotsTxtRulesHandler::IsDisallowAll() const { + for (ui32 botId = 0; botId < robotstxtcfg::max_botid; ++botId) + if (robotstxtcfg::IsYandexBotId(botId) && IsBotIdSupported(botId) && !IsDisallowAll(botId)) + return false; + + return true; +} + +void TPrefixTreeRobotsTxtRulesHandler::DoAllowAll() { + using robotstxtcfg::id_anybot; + + // Drop all bots to default + SupportedBotIds.insert(id_anybot); + for (ui32 botId = 0; botId < robotstxtcfg::max_botid; ++botId) { + if (IsBotIdSupported(botId)) { + ClearInternal(botId); + OptimizedBotIdToStoredBotId[botId] = id_anybot; + LoadedBotIds.insert(botId); + } + } + + // Initialize anybot with "allow all" rule + AddRule(id_anybot, "/", 'A'); + GetInfo(id_anybot).AllowAll = true; + SaveRulesToBuffer(); +} + +void TPrefixTreeRobotsTxtRulesHandler::DoDisallowAll() { + for (ui32 botId = 0; botId < robotstxtcfg::max_botid; ++botId) { + if (!IsBotIdSupported(botId)) + continue; + ClearInternal(botId); + if (botId == robotstxtcfg::id_anybot) { + auto& botInfo = GetInfo(botId); + AddRule(botId, "/", 'D'); + botInfo.DisallowAll = true; + SaveRulesToBuffer(); + } else { + OptimizedBotIdToStoredBotId[botId] = robotstxtcfg::id_anybot; + } + LoadedBotIds.insert(botId); + } +} + +const char* TPrefixTreeRobotsTxtRulesHandler::IsDisallow(const ui32 botId, const char* s, bool useAny) const { + const auto id = GetMappedBotId(botId, useAny); + if (!id) + return nullptr; + + const auto& botInfo = BotIdToPrefixTreeBotInfo[*id]; + if (botInfo && IsDisallowAll(*id, useAny)) { + int index = (const_cast<TPrefixTreeRobotsTxtRulesHandler*>(this))->FindRuleAll(*botInfo, 'D'); + if (index < 0) { //o_O + return botInfo->Rules.Get()[0]; + } else { + return botInfo->Rules.Get()[index]; + } + } + + return GetRule(*id, s, 'D'); +} + +const char* TPrefixTreeRobotsTxtRulesHandler::IsAllow(const ui32 botId, const char* s) const { + const auto id = GetMappedBotId(botId, true); + if (auto p = GetRule(*id, s, 'A')) + return p; + return GetRule(*id, s, 'D') ? nullptr : "/"; +} + +int TPrefixTreeRobotsTxtRulesHandler::StrLenWithoutStars(const char* s) { + int len = 0; + + for (size_t index = 0; s[index]; ++index) { + if (s[index] != '*') { + ++len; + } + } + + return len; +} + +int TPrefixTreeRobotsTxtRulesHandler::TraceBuffer(const ui32 botId, int countRules, const TArrayHolder<TRuleInfo>* ruleInfos) { + CheckBotIdValidity(botId); + auto& prefixBotInfo = GetInfo(botId); + TBotInfo& botInfo = BotIdToInfo[botId]; + + bool store = countRules >= 0; + if (store) { + prefixBotInfo.Rules.Reset(new char*[prefixBotInfo.RulesSize = countRules]); + } + + int beg = -1, n = 0; + *((int*)prefixBotInfo.Buffer.Get()) = prefixBotInfo.BufferSize; + for (size_t i = sizeof(prefixBotInfo.BufferPosition); i < prefixBotInfo.BufferPosition; ++i) + if (prefixBotInfo.Buffer.Get()[i] == '\n' || prefixBotInfo.Buffer.Get()[i] == 0) { + if (beg < 0 || beg + 1 == (int)i) + continue; + + char* s = prefixBotInfo.Buffer.Get() + beg; + if (store) { + switch (*s) { + case 'H': + HostDirective = s + 1; + break; + case 'S': + SiteMaps.insert(s + 1); + break; + case 'C': + ParseCrawlDelay(s + 1, botInfo.CrawlDelay); + break; + case 'P': + CleanParams.insert(s + 1); + break; + default: + prefixBotInfo.Rules.Get()[n] = s + 1; + (*ruleInfos).Get()[n].Len = StrLenWithoutStars(s + 1); + (*ruleInfos).Get()[n].Allow = toupper(*s) == 'A'; + + prefixBotInfo.HasAllow |= toupper(*s) == 'A'; + prefixBotInfo.HasDisallow |= toupper(*s) == 'D'; + break; + } + } + n += (*s != 'H' && *s != 'S' && *s != 'C' && *s != 'P'); + beg = -1; + } else if (beg < 0) + beg = i; + + return n; +} + +int TPrefixTreeRobotsTxtRulesHandler::FindRuleAll(const TPrefixTreeBotInfo& prefixBotInfo, const char neededType) { + static const char* all[] = {"*", "/", "*/", "/*", "*/*"}; + for (int ruleNumber = prefixBotInfo.RulesSize - 1; ruleNumber >= 0; --ruleNumber) { + const char* curRule = prefixBotInfo.Rules.Get()[ruleNumber]; + char ruleType = *(curRule - 1); + + if (strlen(curRule) > 3) + break; + if (neededType != ruleType) + continue; + + for (size_t i = 0; i < sizeof(all) / sizeof(char*); ++i) + if (strcmp(all[i], curRule) == 0) + return ruleNumber; + } + return -1; +} + +bool TPrefixTreeRobotsTxtRulesHandler::HasDisallowRulePrevAllowAll(const TPrefixTreeBotInfo& prefixBotInfo, int ruleAllAllow) { + for (int ruleNumber = ruleAllAllow - 1; ruleNumber >= 0; --ruleNumber) { + const char* curRule = prefixBotInfo.Rules.Get()[ruleNumber]; + char ruleType = *(curRule - 1); + if (tolower(ruleType) == 'd') + return true; + } + return false; +} + +bool TPrefixTreeRobotsTxtRulesHandler::CheckAllowDisallowAll(const ui32 botId, const bool checkDisallow) { + CheckBotIdValidity(botId); + + auto& botInfo = GetInfo(botId); + + if (botInfo.RulesSize == 0) + return !checkDisallow; + if (botInfo.RulesPosition <= 0) + return 0; + + if (checkDisallow) + return !botInfo.HasAllow && FindRuleAll(botInfo, 'D') >= 0; + int ruleAllAllow = FindRuleAll(botInfo, 'A'); + if (ruleAllAllow == -1) + return !botInfo.HasDisallow; + return !HasDisallowRulePrevAllowAll(botInfo, ruleAllAllow); +} + +void TPrefixTreeRobotsTxtRulesHandler::SortRules( + TPrefixTreeBotInfo& prefixBotInfo, + size_t count, + const TArrayHolder<TRuleInfo>* ruleInfos) { + TVector<size_t> indexes(count); + for (size_t index = 0; index < count; ++index) + indexes[index] = index; + + TRulesSortFunc sortFunc(ruleInfos); + std::sort(indexes.begin(), indexes.end(), sortFunc); + + TArrayHolder<char*> workingCopy; + workingCopy.Reset(new char*[count]); + + for (size_t index = 0; index < count; ++index) + workingCopy.Get()[index] = prefixBotInfo.Rules.Get()[index]; + for (size_t index = 0; index < count; ++index) + prefixBotInfo.Rules.Get()[index] = workingCopy.Get()[indexes[index]]; +} + +void TPrefixTreeRobotsTxtRulesHandler::SaveRulesToBuffer() { + // as sitemaps, clean-params and HostDirective from prefix tree was deleted + for (const auto& sitemap: SiteMaps) + AddRule(robotstxtcfg::id_anybot, sitemap, 'S'); + for (const auto& param : CleanParams) + AddRule(robotstxtcfg::id_anybot, param, 'P'); + if (!HostDirective.empty()) + AddRule(robotstxtcfg::id_anybot, HostDirective, 'H'); +} + +void TPrefixTreeRobotsTxtRulesHandler::SaveRulesFromBuffer(const ui32 botId) { + CheckBotIdValidity(botId); + + auto& botInfo = GetInfo(botId); + + TArrayHolder<TRuleInfo> ruleInfos; + + int n = TraceBuffer(botId, -1, nullptr), countPrefix = 0; + ruleInfos.Reset(new TRuleInfo[n]); + botInfo.RulesPosition = TraceBuffer(botId, n, &ruleInfos); + assert(botInfo.RulesPosition == n); + + SortRules(botInfo, n, &ruleInfos); + + botInfo.DisallowAll = CheckAllowDisallowAll(botId, true); + botInfo.AllowAll = CheckAllowDisallowAll(botId, false); + + for (int i = 0; i < n; ++i) + countPrefix += !!isupper(*(botInfo.Rules.Get()[i] - 1)); + + botInfo.PrefixRules.Init(countPrefix); + botInfo.ComplexRules.Reset(new char**[botInfo.ComplexRulesSize = n - countPrefix]); + botInfo.ComplexRulesPosition = 0; + + for (int i = 0; i < n; ++i) { + char* s = botInfo.Rules.Get()[i]; + if (isupper(*(s - 1))) + botInfo.PrefixRules.Add(s, i); + else + botInfo.ComplexRules.Get()[botInfo.ComplexRulesPosition++] = &botInfo.Rules.Get()[i]; + } + botInfo.PrefixRules.Compress(); +} + +void TPrefixTreeRobotsTxtRulesHandler::AfterParse(const ui32 botId) { + CheckBotIdValidity(botId); + + auto& botInfo = GetInfo(botId); + + ResizeBuffer(botId, botInfo.BufferPosition); + SaveRulesFromBuffer(botId); + + if (botInfo.RulesPosition == 0) { + AddRule(botId, "/", 'A'); + } +} + +TPrefixTreeRobotsTxtRulesHandler::TPrefixTreeBotInfo& TPrefixTreeRobotsTxtRulesHandler::GetInfo(ui32 botId) { + Y_ENSURE(botId < robotstxtcfg::max_botid); + auto& res = BotIdToPrefixTreeBotInfo[botId]; + if (!res) { + res = MakeHolder<TPrefixTreeBotInfo>(); + } + return *res; +} + +bool TPrefixTreeRobotsTxtRulesHandler::CheckRule(const char* s, const char* rule) { + const char* r = rule; + const char* s_end = s + strlen(s); + const char* r_end = r + strlen(r); + // assert( r && !strstr(r, "**") ); + for (; *s; ++s) { + if ((s_end - s + 1) * 2 < (r_end - r)) + return 0; + while (*r == '*') + ++r; + + if (*s == *r) { + ++r; + } else { + while (r != rule && *r != '*') + --r; + + if (*r != '*') + return 0; + if (*r == '*') + ++r; + if (*r == *s) + ++r; + } + } + return !*r || (!*(r + 1) && *r == '*'); +} + +bool TPrefixTreeRobotsTxtRulesHandler::IsFull(ui32 botId, size_t length) const { + Y_ENSURE(botId < robotstxtcfg::max_botid); + const auto& botInfo = BotIdToPrefixTreeBotInfo[botId]; + if (!botInfo) { + return false; + } + + return (size_t(botInfo->RulesPosition) >= MaxRulesNumber) || (botInfo->BufferPosition + length + 300 > size_t(RobotsMaxSize)); +} + +bool TPrefixTreeRobotsTxtRulesHandler::IsFullTotal() const { + size_t allBotsRulesCount = 0; + size_t allBotsBufferSize = 0; + + for (const auto& botInfo : BotIdToPrefixTreeBotInfo) { + if (botInfo) { + allBotsRulesCount += botInfo->RulesPosition; + allBotsBufferSize += botInfo->BufferPosition; + } + } + + return (allBotsRulesCount >= MaxRulesNumber) || (allBotsBufferSize + 300 > size_t(RobotsMaxSize)); +} + +size_t TPrefixTreeRobotsTxtRulesHandler::GetPacked(const char*& data) const { + Y_STATIC_THREAD(TBuffer) + packedRepresentation; + + // calculate size, needed for packed data + size_t totalPackedSize = sizeof(ui32); // num of botids + ui32 numOfSupportedBots = 0; + + for (size_t botId = 0; botId < robotstxtcfg::max_botid; ++botId) { + if (!IsBotIdSupported(botId)) { + continue; + } + + const auto& botInfo = BotIdToPrefixTreeBotInfo[GetNotOptimizedBotId(botId)]; + // botId + packedDataSize + packedData + totalPackedSize += sizeof(ui32) + (botInfo ? botInfo->BufferPosition : sizeof(ui32)); + ++numOfSupportedBots; + } + + ((TBuffer&)packedRepresentation).Reserve(totalPackedSize); + + // fill packed data + char* packedPtr = ((TBuffer&)packedRepresentation).Data(); + + *((ui32*)packedPtr) = numOfSupportedBots; + packedPtr += sizeof(ui32); + + for (size_t botId = 0; botId < robotstxtcfg::max_botid; ++botId) { + if (!IsBotIdSupported(botId)) { + continue; + } + + const auto& botInfo = BotIdToPrefixTreeBotInfo[GetNotOptimizedBotId(botId)]; + memcpy(packedPtr, &botId, sizeof(ui32)); + packedPtr += sizeof(ui32); + + if (botInfo) { + *((ui32*)botInfo->Buffer.Get()) = botInfo->BufferPosition; + memcpy(packedPtr, botInfo->Buffer.Get(), botInfo->BufferPosition); + packedPtr += botInfo->BufferPosition; + } else { + // In absense of bot info we serialize only size of its buffer, which is 4 because it takes 4 bytes + ui32 emptyBufferPosition = sizeof(ui32); + memcpy(packedPtr, &emptyBufferPosition, sizeof(ui32)); + packedPtr += sizeof(ui32); + } + } + + data = ((TBuffer&)packedRepresentation).Data(); + return totalPackedSize; +} + +void TPrefixTreeRobotsTxtRulesHandler::LoadPacked(const char* botsData, const char* botsDataEnd) { + Clear(); + + if (Y_UNLIKELY(botsDataEnd != nullptr && botsData >= botsDataEnd)) { + ythrow yexception() << "Buffer overflow"; + } + + ui32 numOfBots = *((ui32*)botsData); + botsData += sizeof(ui32); + + for (ui32 botIndex = 0; botIndex < numOfBots; ++botIndex) { + if (Y_UNLIKELY(botsDataEnd != nullptr && botsData >= botsDataEnd)) { + ythrow yexception() << "Buffer overflow"; + } + + ui32 botId = 0; + memcpy(&botId, botsData, sizeof(ui32)); + botsData += sizeof(ui32); + + // skip bot id's, that not supported for now + if (botId >= robotstxtcfg::max_botid || !IsBotIdSupported(botId)) { + if (Y_UNLIKELY(botsDataEnd != nullptr && botsData >= botsDataEnd)) { + ythrow yexception() << "Buffer overflow"; + } + + ui32 oneBotPackedSize = 0; + memcpy(&oneBotPackedSize, botsData, sizeof(ui32)); + botsData += oneBotPackedSize; + + continue; + } + + //SupportedBotIds.insert(botId); + + auto& botInfo = GetInfo(botId); + + if (Y_UNLIKELY(botsDataEnd != nullptr && botsData >= botsDataEnd)) { + ythrow yexception() << "Buffer overflow"; + } + + static_assert(sizeof(botInfo.BufferSize) == sizeof(ui32), "BufferSize must be 4 bytes"); + static_assert(sizeof(botInfo.BufferPosition) == sizeof(ui32), "BufferPosition must be 4 bytes"); + + memcpy(&botInfo.BufferSize, botsData, sizeof(ui32)); + memcpy(&botInfo.BufferPosition, botsData, sizeof(ui32)); + + if (Y_UNLIKELY(botsDataEnd != nullptr && (botsData + botInfo.BufferSize) > botsDataEnd)) { + ythrow yexception() << "Buffer overflow"; + } + + botInfo.Buffer.Reset(new char[botInfo.BufferSize]); + memcpy(botInfo.Buffer.Get(), botsData, botInfo.BufferSize); + SaveRulesFromBuffer(botId); + + if (botInfo.BufferSize > (int)sizeof(ui32)) { // empty data for robots means, that we don't have section for this bot + LoadedBotIds.insert(botId); + } + + botsData += botInfo.BufferSize; + } + + OptimizeSize(); +} + +void TPrefixTreeRobotsTxtRulesHandler::Dump(const ui32 botId, FILE* dumpFile) { + if (!dumpFile) + dumpFile = stderr; + fprintf(dumpFile, "User-Agent: %s\n", robotstxtcfg::GetFullName(botId).data()); + for (TRobotsTxtRulesIterator it = GetRulesIterator(botId); it.HasRule(); it.Next()) + fprintf(dumpFile, "%s: %s\n", DirTypeToName(it.GetRuleType()), it.GetInitialRule().data()); +} + +void TPrefixTreeRobotsTxtRulesHandler::Dump(const ui32 botId, IOutputStream& out) { + out << "User-Agent: " << robotstxtcfg::GetFullName(botId) << Endl; + for (TRobotsTxtRulesIterator it = GetRulesIterator(botId); it.HasRule(); it.Next()) + out << DirTypeToName(it.GetRuleType()) << ": " << it.GetInitialRule() << Endl; +} diff --git a/library/cpp/robots_txt/robots_txt.h b/library/cpp/robots_txt/robots_txt.h new file mode 100644 index 0000000000..5ee48fb14f --- /dev/null +++ b/library/cpp/robots_txt/robots_txt.h @@ -0,0 +1,605 @@ +#pragma once + +#include "constants.h" +#include "robots_txt_parser.h" +#include "prefix_tree.h" +#include "robotstxtcfg.h" + +#include <util/generic/noncopyable.h> +#include <util/generic/map.h> +#include <util/generic/maybe.h> +#include <util/generic/ptr.h> +#include <util/generic/set.h> + +#include <array> +#include <utility> + + +enum EDirectiveType { + USER_AGENT = 1, + DISALLOW = 2, + ALLOW = 3, + HOST = 4, + SITEMAP = 5, + CRAWL_DELAY = 6, + CLEAN_PARAM = 7, + UNKNOWN = 9, +}; + +enum EFormatErrorType { + ERROR_RULE_NOT_SLASH = 1, + ERROR_ASTERISK_MULTI = 2, + ERROR_HOST_MULTI = 3, + ERROR_ROBOTS_HUGE = 4, + ERROR_RULE_BEFORE_USER_AGENT = 5, + ERROR_RULE_HUGE = 6, + ERROR_HOST_FORMAT = 7, + ERROR_TRASH = 8, + ERROR_SITEMAP_FORMAT = 9, + ERROR_CRAWL_DELAY_FORMAT = 10, + ERROR_CRAWL_DELAY_MULTI = 11, + ERROR_CLEAN_PARAM_FORMAT = 12, + + WARNING_EMPTY_RULE = 30, + WARNING_SUSPECT_SYMBOL = 31, + WARNING_UNKNOWN_FIELD = 33, + WARNING_UPPER_REGISTER = 34, + WARNING_SITEMAP = 35, +}; + +class TRobotsTxtRulesIterator { +private: + const char* Begin = nullptr; + const char* End = nullptr; + +public: + TRobotsTxtRulesIterator() = default; + TRobotsTxtRulesIterator(const char* begin, const char* end); + void Next(); + bool HasRule() const; + const char* GetRule() const; + TString GetInitialRule() const; // unlike GetRule(), it neither omits trailing '$' nor adds redundant '*' + EDirectiveType GetRuleType() const; + + static EDirectiveType CharToDirType(char ch); +}; + +class TRobotsTxtRulesHandlerBase { +public: + typedef TVector<std::pair<EFormatErrorType, int>> TErrorVector; + + TRobotsTxtRulesHandlerBase( + TBotIdSet supportedBotIds, + int robotsMaxSize, + int maxRulesNumber, + bool saveDataForAnyBot); + + TRobotsTxtRulesHandlerBase( + const TSet<ui32>& supportedBotIds, + int robotsMaxSize, + int maxRulesNumber, + bool saveDataForAnyBot); + + virtual ~TRobotsTxtRulesHandlerBase(); + + int GetCrawlDelay(ui32 botId, bool* realInfo = nullptr) const; + int GetMinCrawlDelay(int defaultCrawlDelay = -1) const; + bool IsHandlingErrors() const; + const TString& GetHostDirective() const; + const TVector<TString> GetSiteMaps() const; + const TVector<TString> GetCleanParams() const; + const TErrorVector& GetErrors() const; + TVector<int> GetAcceptedLines(ui32 botId = robotstxtcfg::id_yandexbot) const; + + template <class THostHandler> + static int ParseRules(TRobotsTxtParser& parser, TRobotsTxtRulesHandlerBase* rulesHandler, THostHandler* hostHandler, const char* host = nullptr); + static inline void ClearAllExceptCrossSection(TRobotsTxtParser& parser, TRobotsTxtRulesHandlerBase* rulesHandler, ui32 botId); + static int CheckHost(const char* host); + static int CheckSitemapUrl(const char* url, const char* host, TString& modifiedUrl); + static int CheckRule(const char* value, int line, TRobotsTxtRulesHandlerBase* rulesHandler); + static int CheckAndNormCleanParam(TString& s); + static int ParseCrawlDelay(const char* value, int& crawlDelay); + static EDirectiveType NameToDirType(const char* d); + static const char* DirTypeToName(EDirectiveType t); + + void SetErrorsHandling(bool handleErrors); + void SetHostDirective(const char* hostDirective); + void SetCrawlDelay(ui32 botId, int crawlDelay); + void AddAcceptedLine(ui32 line, const TBotIdSet& botIds, bool isCrossSection); + void AddSiteMap(const char* sitemap); + void AddCleanParam(const char* cleanParam); + bool AddRuleWithErrorCheck(ui32 botId, TStringBuf rule, char type, TRobotsTxtParser& parser); + int OnHost(ui32 botId, TRobotsTxtParser& parser, const char* value, TRobotsTxtRulesHandlerBase*& rulesHandler); + + virtual void Clear(); + virtual bool IsAllowAll(ui32 botId) const = 0; + virtual bool IsAllowAll() const = 0; + virtual bool IsDisallowAll(ui32 botId, bool useAny = true) const = 0; + virtual bool IsDisallowAll() const = 0; + virtual const char* IsDisallow(ui32 botId, const char* s, bool useAny = true) const = 0; + virtual const char* IsAllow(ui32 botId, const char* s) const = 0; + virtual TRobotsTxtRulesIterator GetRulesIterator(ui32 botId) const = 0; + virtual void Dump(ui32 botId, FILE* logFile) = 0; + virtual void Dump(ui32 botId, IOutputStream& out) = 0; + virtual bool Empty(ui32 botId) const = 0; + virtual void LoadPacked(const char* botsData, const char* botsDataEnd = nullptr) = 0; + virtual size_t GetPacked(const char*& data) const = 0; + virtual void AfterParse(ui32 botId) = 0; + virtual void DoAllowAll() = 0; + virtual void DoDisallowAll() = 0; + bool IsBotIdLoaded(ui32 botId) const; + bool IsBotIdSupported(ui32 botId) const; + ui32 GetNotOptimizedBotId(ui32 botId) const; + TMaybe<ui32> GetMappedBotId(ui32 botId, bool useAny = true) const; + +protected: + void CheckBotIdValidity(ui32 botId) const; + virtual bool OptimizeSize() = 0; + +private: + bool HandleErrors; + +protected: + struct TBotInfo { + int CrawlDelay; + + TBotInfo() + : CrawlDelay(-1) + { + } + }; + + TBotIdSet LoadedBotIds; + TSet<TString> SiteMaps; + TSet<TString> CleanParams; + TString HostDirective; + TErrorVector Errors; + typedef std::pair<ui32, ui32> TBotIdAcceptedLine; + TVector<TBotIdAcceptedLine> AcceptedLines; + TVector<ui32> CrossSectionAcceptedLines; + + TVector<TBotInfo> BotIdToInfo; + int CrawlDelay; + size_t RobotsMaxSize; + size_t MaxRulesNumber; + bool SaveDataForAnyBot; + + TBotIdSet SupportedBotIds; + std::array<ui8, robotstxtcfg::max_botid> OptimizedBotIdToStoredBotId; + + virtual bool IsFull(ui32 botId, size_t length) const = 0; + virtual bool IsFullTotal() const = 0; + virtual bool AddRule(ui32 botId, TStringBuf rule, char type) = 0; + //parts of ParseRules + inline static void CheckRobotsLines(TRobotsTxtRulesHandlerBase* rulesHandler, TVector<int>& nonRobotsLines); + inline static void CheckAsterisk(TRobotsTxtRulesHandlerBase* rulesHandler, const char* value, ui32 lineNumber, bool& wasAsterisk); + inline static bool CheckWasUserAgent(TRobotsTxtRulesHandlerBase* rulesHandler, bool wasUserAgent, bool& ruleBeforeUserAgent, bool& wasRule, ui32 lineNumber); + inline static bool CheckRuleNotSlash(TRobotsTxtRulesHandlerBase* rulesHandler, const char* value, ui32 lineNumber); + inline static bool CheckSupportedBots(const TBotIdSet& currentBotIds, TBotIdSet& wasRuleForBot, const TBotIdSet& isSupportedBot); + inline static bool CheckEmptyRule(TRobotsTxtRulesHandlerBase* rulesHandler, const char* value, EDirectiveType& type, ui32 lineNumber); + inline static bool ProcessSitemap(TRobotsTxtRulesHandlerBase* rulesHandler, TRobotsTxtParser& parser, const char* value, const char* host); + inline static bool ProcessCleanParam(TRobotsTxtRulesHandlerBase* rulesHandler, TRobotsTxtParser& parser, TString& value); + inline static bool AddRules( + TRobotsTxtRulesHandlerBase* rulesHandler, + TRobotsTxtParser& parser, + const char* value, + char type, + const TBotIdSet& currentBotIds, + const TBotIdSet& isSupportedBot); + + inline static bool ProcessCrawlDelay( + TRobotsTxtRulesHandlerBase* rulesHandler, + TRobotsTxtParser& parser, + const TBotIdSet& currentBotIds, + const TBotIdSet& isSupportedBot, + const char* value); + + inline static void ProcessUserAgent( + TRobotsTxtRulesHandlerBase* rulesHandler, + TRobotsTxtParser& parser, + const TBotIdSet& currentBotIds, + TBotIdSet& wasRuleForBot, + TBotIdSet& isSupportedBot, + TVector<ui32>& botIdToMaxAppropriateUserAgentNameLength, + const char* value); + + bool CheckRobot( + const char* userAgent, + TBotIdSet& botIds, + const TVector<ui32>* botIdToMaxAppropriateUserAgentNameLength = nullptr) const; + + virtual void ClearInternal(ui32 botId); + + void AddError(EFormatErrorType type, int line); + + void ResetOptimized() noexcept; +}; + +class TPrefixTreeRobotsTxtRulesHandler: public TRobotsTxtRulesHandlerBase, TNonCopyable { +private: + static const int INIT_BUFFER_SIZE = 1 << 6; + + struct TRuleInfo { + size_t Len; + bool Allow; + }; + + bool IsFull(ui32 botId, size_t length) const override; + bool IsFullTotal() const override; + bool AddRule(ui32 botId, TStringBuf rule, char type) override; + const char* GetRule(ui32 botId, const char* s, char type) const; + void ResizeBuffer(ui32 botId, int newSize); + void SaveRulesFromBuffer(ui32 botId); + int TraceBuffer(ui32 botId, int countRules, const TArrayHolder<TRuleInfo>* ruleInfos); + bool CheckAllowDisallowAll(ui32 botId, bool checkDisallow); + void SaveRulesToBuffer(); + int StrLenWithoutStars(const char* s); + +protected: + class TRulesSortFunc { + private: + const TArrayHolder<TRuleInfo>* RuleInfos; + + public: + TRulesSortFunc(const TArrayHolder<TRuleInfo>* ruleInfos) + : RuleInfos(ruleInfos) + { + } + bool operator()(const size_t& lhs, const size_t& rhs) { + const TRuleInfo& left = (*RuleInfos).Get()[lhs]; + const TRuleInfo& right = (*RuleInfos).Get()[rhs]; + return (left.Len == right.Len) ? left.Allow && !right.Allow : left.Len > right.Len; + } + }; + + struct TPrefixTreeBotInfo { + bool DisallowAll = false; + bool AllowAll = false; + bool HasDisallow = false; + bool HasAllow = false; + + TArrayHolder<char> Buffer{new char[INIT_BUFFER_SIZE]}; + ui32 BufferPosition = sizeof(BufferPosition); + int BufferSize = INIT_BUFFER_SIZE; + + TArrayHolder<char*> Rules = nullptr; + int RulesPosition = 0; + int RulesSize = 0; + + TArrayHolder<char**> ComplexRules = nullptr; + int ComplexRulesPosition = 0; + int ComplexRulesSize = 0; + + TPrefixTree PrefixRules {0}; + }; + + std::array<THolder<TPrefixTreeBotInfo>, robotstxtcfg::max_botid> BotIdToPrefixTreeBotInfo; + + TPrefixTreeBotInfo& GetInfo(ui32 botId); + static bool CheckRule(const char* s, const char* rule); + void ClearInternal(ui32 botId) override; + bool OptimizeSize() override; + +private: + void SortRules(TPrefixTreeBotInfo& prefixBotInfo, size_t count, const TArrayHolder<TRuleInfo>* ruleInfos); + bool HasDisallowRulePrevAllowAll(const TPrefixTreeBotInfo& prefixBotInfo, int ruleAllAllow); + int FindRuleAll(const TPrefixTreeBotInfo& prefixBotInfo, char neededType); + +public: + TPrefixTreeRobotsTxtRulesHandler( + TBotIdSet supportedBotIds = robotstxtcfg::defaultSupportedBotIds, + int robotsMaxSize = robots_max, + int maxRulesCount = -1, + bool saveDataForAnyBot = true); + + TPrefixTreeRobotsTxtRulesHandler( + std::initializer_list<ui32> supportedBotIds, + int robotsMaxSize = robots_max, + int maxRulesCount = -1, + bool saveDataForAnyBot = true); + + TPrefixTreeRobotsTxtRulesHandler( + const TSet<ui32>& supportedBotIds, + int robotsMaxSize = robots_max, + int maxRulesCount = -1, + bool saveDataForAnyBot = true); + + void Clear() override; + void AfterParse(ui32 botId) override; + bool IsAllowAll(ui32 botId) const override; + bool IsAllowAll() const override; + bool IsDisallowAll(ui32 botId, bool useAny = true) const override; + bool IsDisallowAll() const override; + const char* IsDisallow(ui32 botId, const char* s, bool useAny = true) const override; + const char* IsAllow(ui32 botId, const char* s) const override; + TRobotsTxtRulesIterator GetRulesIterator(ui32 botId) const override; + void DoAllowAll() override; + void DoDisallowAll() override; + bool Empty(ui32 botId) const override; + + void LoadPacked(const char* botsData, const char* botsDataEnd = nullptr) override; + size_t GetPacked(const char*& data) const override; + void Dump(ui32 botId, FILE* logFile) override; + void Dump(ui32 botId, IOutputStream& out) override; + size_t GetMemorySize(); +}; + +using TRobotsTxt = TPrefixTreeRobotsTxtRulesHandler; + +void TRobotsTxtRulesHandlerBase::ClearAllExceptCrossSection(TRobotsTxtParser& parser, TRobotsTxtRulesHandlerBase* rulesHandler, ui32 botId) { + rulesHandler->ClearInternal(botId); + if (botId == robotstxtcfg::id_anybot) { + // as sitemaps, clean-params and HostDirective from prefix tree was deleted + for (const auto& sitemap : rulesHandler->SiteMaps) { + rulesHandler->AddRuleWithErrorCheck(robotstxtcfg::id_anybot, sitemap, 'S', parser); + } + for (const auto& param : rulesHandler->CleanParams) { + rulesHandler->AddRuleWithErrorCheck(robotstxtcfg::id_anybot, param, 'P', parser); + } + if (!rulesHandler->HostDirective.empty()) { + rulesHandler->AddRuleWithErrorCheck(robotstxtcfg::id_anybot, rulesHandler->HostDirective, 'H', parser); + } + } +} + +void TRobotsTxtRulesHandlerBase::CheckRobotsLines(TRobotsTxtRulesHandlerBase* rulesHandler, TVector<int>& nonRobotsLines) { + if (rulesHandler->IsHandlingErrors()) { + for (size_t i = 0; i < nonRobotsLines.size(); ++i) + rulesHandler->AddError(ERROR_TRASH, nonRobotsLines[i]); + nonRobotsLines.clear(); + } +} + +void TRobotsTxtRulesHandlerBase::CheckAsterisk(TRobotsTxtRulesHandlerBase* rulesHandler, const char* value, ui32 lineNumber, bool& wasAsterisk) { + if (strcmp(value, "*") == 0) { + if (wasAsterisk) + rulesHandler->AddError(ERROR_ASTERISK_MULTI, lineNumber); + wasAsterisk = true; + } +} + +bool TRobotsTxtRulesHandlerBase::CheckWasUserAgent(TRobotsTxtRulesHandlerBase* rulesHandler, bool wasUserAgent, bool& ruleBeforeUserAgent, bool& wasRule, ui32 lineNumber) { + if (wasUserAgent) { + wasRule = true; + return false; + } + if (!ruleBeforeUserAgent) { + ruleBeforeUserAgent = true; + rulesHandler->AddError(ERROR_RULE_BEFORE_USER_AGENT, lineNumber); + } + return true; +} + +bool TRobotsTxtRulesHandlerBase::CheckRuleNotSlash(TRobotsTxtRulesHandlerBase* rulesHandler, const char* value, ui32 lineNumber) { + if (*value && *value != '/' && *value != '*') { + rulesHandler->AddError(ERROR_RULE_NOT_SLASH, lineNumber); + return true; + } + return false; +} + +bool TRobotsTxtRulesHandlerBase::CheckSupportedBots( + const TBotIdSet& currentBotIds, + TBotIdSet& wasRuleForBot, + const TBotIdSet& isSupportedBot) +{ + bool hasAtLeastOneSupportedBot = false; + for (ui32 currentBotId : currentBotIds) { + wasRuleForBot.insert(currentBotId); + hasAtLeastOneSupportedBot = hasAtLeastOneSupportedBot || isSupportedBot.contains(currentBotId); + } + return hasAtLeastOneSupportedBot; +} + +bool TRobotsTxtRulesHandlerBase::CheckEmptyRule(TRobotsTxtRulesHandlerBase* rulesHandler, const char* value, EDirectiveType& type, ui32 lineNumber) { + if (value && strlen(value) == 0) { + rulesHandler->AddError(WARNING_EMPTY_RULE, lineNumber); + type = type == ALLOW ? DISALLOW : ALLOW; + return true; + } + return false; +} + +bool TRobotsTxtRulesHandlerBase::AddRules( + TRobotsTxtRulesHandlerBase* rulesHandler, + TRobotsTxtParser& parser, + const char* value, + char type, + const TBotIdSet& currentBotIds, + const TBotIdSet& isSupportedBot) +{ + for (ui32 currentBotId : currentBotIds) { + if (!isSupportedBot.contains(currentBotId)) + continue; + if (!rulesHandler->AddRuleWithErrorCheck(currentBotId, value, type, parser)) + return true; + } + return false; +} + +bool TRobotsTxtRulesHandlerBase::ProcessSitemap(TRobotsTxtRulesHandlerBase* rulesHandler, TRobotsTxtParser& parser, const char* value, const char* host) { + TString modifiedUrl; + if (!CheckSitemapUrl(value, host, modifiedUrl)) + rulesHandler->AddError(ERROR_SITEMAP_FORMAT, parser.GetLineNumber()); + else { + rulesHandler->AddSiteMap(modifiedUrl.data()); + if (!rulesHandler->AddRuleWithErrorCheck(robotstxtcfg::id_anybot, modifiedUrl.data(), 'S', parser)) + return true; + } + return false; +} + +bool TRobotsTxtRulesHandlerBase::ProcessCleanParam(TRobotsTxtRulesHandlerBase* rulesHandler, TRobotsTxtParser& parser, TString& value) { + if (!CheckAndNormCleanParam(value)) + rulesHandler->AddError(ERROR_CLEAN_PARAM_FORMAT, parser.GetLineNumber()); + else { + rulesHandler->AddCleanParam(value.data()); + if (!rulesHandler->AddRuleWithErrorCheck(robotstxtcfg::id_anybot, value.data(), 'P', parser)) + return true; + } + return false; +} + +bool TRobotsTxtRulesHandlerBase::ProcessCrawlDelay( + TRobotsTxtRulesHandlerBase* rulesHandler, + TRobotsTxtParser& parser, + const TBotIdSet& currentBotIds, + const TBotIdSet& isSupportedBot, + const char* value) { + for (ui32 currentBotId : currentBotIds) { + if (!isSupportedBot.contains(currentBotId)) + continue; + if (rulesHandler->BotIdToInfo[currentBotId].CrawlDelay >= 0) { + rulesHandler->AddError(ERROR_CRAWL_DELAY_MULTI, parser.GetLineNumber()); + break; + } + int crawlDelay = -1; + if (!ParseCrawlDelay(value, crawlDelay)) + rulesHandler->AddError(ERROR_CRAWL_DELAY_FORMAT, parser.GetLineNumber()); + else { + rulesHandler->SetCrawlDelay(currentBotId, crawlDelay); + if (!rulesHandler->AddRuleWithErrorCheck(currentBotId, value, 'C', parser)) + return true; + } + } + return false; +} + +void TRobotsTxtRulesHandlerBase::ProcessUserAgent( + TRobotsTxtRulesHandlerBase* rulesHandler, + TRobotsTxtParser& parser, + const TBotIdSet& currentBotIds, + TBotIdSet& wasSupportedBot, + TBotIdSet& isSupportedBot, + TVector<ui32>& botIdToMaxAppropriateUserAgentNameLength, + const char* value) +{ + ui32 userAgentNameLength = (ui32)strlen(value); + + for (ui32 currentBotId : currentBotIds) { + bool userAgentNameLonger = userAgentNameLength > botIdToMaxAppropriateUserAgentNameLength[currentBotId]; + bool userAgentNameSame = userAgentNameLength == botIdToMaxAppropriateUserAgentNameLength[currentBotId]; + + if (!wasSupportedBot.contains(currentBotId) || userAgentNameLonger) + ClearAllExceptCrossSection(parser, rulesHandler, currentBotId); + + wasSupportedBot.insert(currentBotId); + if (userAgentNameLonger || userAgentNameSame) { + isSupportedBot.insert(currentBotId); // Allow multiple blocks for the same user agent + } + botIdToMaxAppropriateUserAgentNameLength[currentBotId] = Max(userAgentNameLength, botIdToMaxAppropriateUserAgentNameLength[currentBotId]); + } +} + +template <class THostHandler> +int TRobotsTxtRulesHandlerBase::ParseRules(TRobotsTxtParser& parser, TRobotsTxtRulesHandlerBase* rulesHandler, THostHandler* hostHandler, const char* host) { + rulesHandler->Clear(); + + TBotIdSet wasSupportedBot; + TBotIdSet wasRuleForBot; + bool wasAsterisk = false; + TVector<int> nonRobotsLines; + TVector<ui32> botIdToMaxAppropriateUserAgentNameLength(robotstxtcfg::max_botid, 0); + static char all[] = "/"; + EDirectiveType prevType = USER_AGENT; + while (parser.HasRecord()) { + TRobotsTxtRulesRecord record = parser.NextRecord(); + bool wasUserAgent = false; + bool isRobotsRecordUseful = false; + TBotIdSet isSupportedBot; + TBotIdSet currentBotIds; + TString field; + TString value; + bool ruleBeforeUserAgent = false; + int ret = 0; + bool wasRule = false; + bool wasBlank = false; + while (record.NextPair(field, value, isRobotsRecordUseful && rulesHandler->IsHandlingErrors(), nonRobotsLines, &wasBlank)) { + CheckRobotsLines(rulesHandler, nonRobotsLines); + EDirectiveType type = NameToDirType(field.data()); + EDirectiveType typeBeforeChange = type; + + if ((prevType != type || wasBlank) && type == USER_AGENT) { + currentBotIds.clear(); + } + prevType = type; + + switch (type) { + case USER_AGENT: + if (wasUserAgent && wasRule) { + wasRule = false; + currentBotIds.clear(); + isSupportedBot.clear(); + } + wasUserAgent = true; + value.to_lower(); + CheckAsterisk(rulesHandler, value.data(), parser.GetLineNumber(), wasAsterisk); + isRobotsRecordUseful = rulesHandler->CheckRobot(value.data(), currentBotIds, &botIdToMaxAppropriateUserAgentNameLength); + if (isRobotsRecordUseful) + ProcessUserAgent(rulesHandler, parser, currentBotIds, wasSupportedBot, isSupportedBot, botIdToMaxAppropriateUserAgentNameLength, value.data()); + break; + + case DISALLOW: + case ALLOW: + if (CheckWasUserAgent(rulesHandler, wasUserAgent, ruleBeforeUserAgent, wasRule, parser.GetLineNumber())) + break; + if (CheckRuleNotSlash(rulesHandler, value.data(), parser.GetLineNumber())) + break; + CheckRule(value.data(), parser.GetLineNumber(), rulesHandler); + if (!CheckSupportedBots(currentBotIds, wasRuleForBot, isSupportedBot)) { + break; + } + if (CheckEmptyRule(rulesHandler, value.data(), type, parser.GetLineNumber())) { + value = all; + if (typeBeforeChange == ALLOW) + continue; + } + + if (AddRules(rulesHandler, parser, value.data(), type == ALLOW ? 'A' : 'D', currentBotIds, isSupportedBot)) + return 2; + break; + + case HOST: + value.to_lower(); + ret = hostHandler->OnHost(robotstxtcfg::id_anybot, parser, value.data(), rulesHandler); + if (ret) + return ret; + break; + + case SITEMAP: + if (ProcessSitemap(rulesHandler, parser, value.data(), host)) + return 2; + break; + + case CLEAN_PARAM: + if (ProcessCleanParam(rulesHandler, parser, value)) + return 2; + break; + + case CRAWL_DELAY: + if (ProcessCrawlDelay(rulesHandler, parser, currentBotIds, isSupportedBot, value.data())) + return 2; + break; + + default: + rulesHandler->AddError(WARNING_UNKNOWN_FIELD, parser.GetLineNumber()); + break; + } + bool isCrossSection = type == SITEMAP || type == HOST || type == CLEAN_PARAM; + if (rulesHandler->IsHandlingErrors() && (isRobotsRecordUseful || isCrossSection)) + rulesHandler->AddAcceptedLine(parser.GetLineNumber(), currentBotIds, isCrossSection); + } + } + + for (auto botId : wasSupportedBot) { + rulesHandler->LoadedBotIds.insert(botId); + if (rulesHandler->IsBotIdSupported(botId)) + rulesHandler->AfterParse(botId); + } + + if (!rulesHandler->OptimizeSize()) { + return 2; + } + + return 1; +} diff --git a/library/cpp/robots_txt/robots_txt_parser.cpp b/library/cpp/robots_txt/robots_txt_parser.cpp new file mode 100644 index 0000000000..8e2fe6073d --- /dev/null +++ b/library/cpp/robots_txt/robots_txt_parser.cpp @@ -0,0 +1,116 @@ +#include "robots_txt_parser.h" +#include <util/generic/string.h> +#include <util/stream/output.h> + +TRobotsTxtParser::TRobotsTxtParser(IInputStream& inputStream) + : InputStream(inputStream) + , LineNumber(0) + , IsLastSymbolCR(false) +{ +} + +int TRobotsTxtParser::GetLineNumber() { + return LineNumber; +} + +const char* TRobotsTxtParser::ReadLine() { + Line = ""; + char c; + + if (IsLastSymbolCR) { + if (!InputStream.ReadChar(c)) + return nullptr; + if (c != '\n') + Line.append(c); + } + + bool hasMoreSymbols; + while (hasMoreSymbols = InputStream.ReadChar(c)) { + if (c == '\r') { + IsLastSymbolCR = true; + break; + } else { + IsLastSymbolCR = false; + if (c == '\n') + break; + Line.append(c); + } + } + if (!hasMoreSymbols && Line.empty()) + return nullptr; + + // BOM UTF-8: EF BB BF + if (0 == LineNumber && Line.size() >= 3 && Line[0] == '\xEF' && Line[1] == '\xBB' && Line[2] == '\xBF') + Line = Line.substr(3, Line.size() - 3); + + ++LineNumber; + int i = Line.find('#'); + if (i == 0) + Line = ""; + else if (i > 0) + Line = Line.substr(0, i); + return Line.data(); +} + +bool TRobotsTxtParser::IsBlankLine(const char* s) { + for (const char* p = s; *p; ++p) + if (!isspace(*p)) + return 0; + return 1; +} + +char* TRobotsTxtParser::Trim(char* s) { + while (isspace(*s)) + ++s; + char* p = s + strlen(s) - 1; + while (s < p && isspace(*p)) + --p; + *(p + 1) = 0; + return s; +} + +inline bool TRobotsTxtParser::IsRobotsLine(const char* s) { + return strchr(s, ':'); +} + +bool TRobotsTxtParser::HasRecord() { + while (!IsRobotsLine(Line.data())) + if (!ReadLine()) + return 0; + return 1; +} + +TRobotsTxtRulesRecord TRobotsTxtParser::NextRecord() { + return TRobotsTxtRulesRecord(*this); +} + +TRobotsTxtRulesRecord::TRobotsTxtRulesRecord(TRobotsTxtParser& parser) + : Parser(parser) +{ +} + +bool TRobotsTxtRulesRecord::NextPair(TString& field, TString& value, bool handleErrors, TVector<int>& nonRobotsLines, bool* wasBlank) { + if (wasBlank) { + *wasBlank = false; + } + while (!Parser.IsRobotsLine(Parser.Line.data())) { + if (!Parser.ReadLine()) + return 0; + if (Parser.IsBlankLine(Parser.Line.data())) { + if (wasBlank) { + *wasBlank = true; + } + continue; + } + if (handleErrors && !Parser.IsRobotsLine(Parser.Line.data())) + nonRobotsLines.push_back(Parser.GetLineNumber()); + } + + char* s = strchr(Parser.Line.begin(), ':'); + *s = 0; + char* p = s + 1; + + field = TRobotsTxtParser::Trim(strlwr(Parser.Line.begin())); + value = TRobotsTxtParser::Trim(p); + return 1; +} diff --git a/library/cpp/robots_txt/robots_txt_parser.h b/library/cpp/robots_txt/robots_txt_parser.h new file mode 100644 index 0000000000..8032d0d20b --- /dev/null +++ b/library/cpp/robots_txt/robots_txt_parser.h @@ -0,0 +1,38 @@ +#pragma once + +#include <algorithm> +#include <util/generic/string.h> +#include <util/generic/vector.h> +#include <util/stream/input.h> + +class TRobotsTxtParser; + +class TRobotsTxtRulesRecord { +private: + TRobotsTxtParser& Parser; + +public: + TRobotsTxtRulesRecord(TRobotsTxtParser& parser); + bool NextPair(TString& field, TString& value, bool handleErrors, TVector<int>& nonRobotsLines, bool* wasBlank = nullptr); +}; + +class TRobotsTxtParser { + friend class TRobotsTxtRulesRecord; + +private: + IInputStream& InputStream; + TString Line; + int LineNumber; + bool IsLastSymbolCR; + + const char* ReadLine(); + static bool IsBlankLine(const char*); + static bool IsRobotsLine(const char*); + +public: + static char* Trim(char*); + TRobotsTxtParser(IInputStream& inputStream); + bool HasRecord(); + TRobotsTxtRulesRecord NextRecord(); + int GetLineNumber(); +}; diff --git a/library/cpp/robots_txt/robotstxtcfg.h b/library/cpp/robots_txt/robotstxtcfg.h new file mode 100644 index 0000000000..5ca1682a0c --- /dev/null +++ b/library/cpp/robots_txt/robotstxtcfg.h @@ -0,0 +1,3 @@ +#pragma once + +#include <library/cpp/robots_txt/robotstxtcfg/robotstxtcfg.h> diff --git a/library/cpp/robots_txt/robotstxtcfg/CMakeLists.darwin-x86_64.txt b/library/cpp/robots_txt/robotstxtcfg/CMakeLists.darwin-x86_64.txt new file mode 100644 index 0000000000..09cfd4b3f1 --- /dev/null +++ b/library/cpp/robots_txt/robotstxtcfg/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,20 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(cpp-robots_txt-robotstxtcfg) +target_link_libraries(cpp-robots_txt-robotstxtcfg PUBLIC + contrib-libs-cxxsupp + yutil + library-cpp-case_insensitive_string +) +target_sources(cpp-robots_txt-robotstxtcfg PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/robotstxtcfg/bot_id_set.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/robotstxtcfg/robotstxtcfg.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/robotstxtcfg/user_agents.cpp +) diff --git a/library/cpp/robots_txt/robotstxtcfg/CMakeLists.linux-aarch64.txt b/library/cpp/robots_txt/robotstxtcfg/CMakeLists.linux-aarch64.txt new file mode 100644 index 0000000000..6fe7e7a7ad --- /dev/null +++ b/library/cpp/robots_txt/robotstxtcfg/CMakeLists.linux-aarch64.txt @@ -0,0 +1,21 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(cpp-robots_txt-robotstxtcfg) +target_link_libraries(cpp-robots_txt-robotstxtcfg PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + library-cpp-case_insensitive_string +) +target_sources(cpp-robots_txt-robotstxtcfg PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/robotstxtcfg/bot_id_set.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/robotstxtcfg/robotstxtcfg.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/robotstxtcfg/user_agents.cpp +) diff --git a/library/cpp/robots_txt/robotstxtcfg/CMakeLists.linux-x86_64.txt b/library/cpp/robots_txt/robotstxtcfg/CMakeLists.linux-x86_64.txt new file mode 100644 index 0000000000..6fe7e7a7ad --- /dev/null +++ b/library/cpp/robots_txt/robotstxtcfg/CMakeLists.linux-x86_64.txt @@ -0,0 +1,21 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(cpp-robots_txt-robotstxtcfg) +target_link_libraries(cpp-robots_txt-robotstxtcfg PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + library-cpp-case_insensitive_string +) +target_sources(cpp-robots_txt-robotstxtcfg PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/robotstxtcfg/bot_id_set.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/robotstxtcfg/robotstxtcfg.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/robotstxtcfg/user_agents.cpp +) diff --git a/library/cpp/robots_txt/robotstxtcfg/CMakeLists.txt b/library/cpp/robots_txt/robotstxtcfg/CMakeLists.txt new file mode 100644 index 0000000000..f8b31df0c1 --- /dev/null +++ b/library/cpp/robots_txt/robotstxtcfg/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) + include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/library/cpp/robots_txt/robotstxtcfg/CMakeLists.windows-x86_64.txt b/library/cpp/robots_txt/robotstxtcfg/CMakeLists.windows-x86_64.txt new file mode 100644 index 0000000000..09cfd4b3f1 --- /dev/null +++ b/library/cpp/robots_txt/robotstxtcfg/CMakeLists.windows-x86_64.txt @@ -0,0 +1,20 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(cpp-robots_txt-robotstxtcfg) +target_link_libraries(cpp-robots_txt-robotstxtcfg PUBLIC + contrib-libs-cxxsupp + yutil + library-cpp-case_insensitive_string +) +target_sources(cpp-robots_txt-robotstxtcfg PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/robotstxtcfg/bot_id_set.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/robotstxtcfg/robotstxtcfg.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/robots_txt/robotstxtcfg/user_agents.cpp +) diff --git a/library/cpp/robots_txt/robotstxtcfg/bot_id_set.cpp b/library/cpp/robots_txt/robotstxtcfg/bot_id_set.cpp new file mode 100644 index 0000000000..aec668582c --- /dev/null +++ b/library/cpp/robots_txt/robotstxtcfg/bot_id_set.cpp @@ -0,0 +1,2 @@ +#include "bot_id_set.h" +// header compile test diff --git a/library/cpp/robots_txt/robotstxtcfg/bot_id_set.h b/library/cpp/robots_txt/robotstxtcfg/bot_id_set.h new file mode 100644 index 0000000000..08aaa68a50 --- /dev/null +++ b/library/cpp/robots_txt/robotstxtcfg/bot_id_set.h @@ -0,0 +1,132 @@ +#pragma once + +#include "user_agents.h" + +#include <bitset> + + +/// Simple vector-based set for bot ids, meant to optimize memory and lookups +class TBotIdSet +{ +public: + using TData = std::bitset<robotstxtcfg::max_botid>; + + constexpr TBotIdSet() noexcept = default; + constexpr TBotIdSet(const TBotIdSet&) noexcept = default; + constexpr TBotIdSet(TBotIdSet&&) noexcept = default; + constexpr TBotIdSet& operator = (const TBotIdSet&) noexcept = default; + constexpr TBotIdSet& operator = (TBotIdSet&&) noexcept = default; + + TBotIdSet(std::initializer_list<ui32> botIds) { + for (auto id : botIds) { + insert(id); + } + } + + static TBotIdSet All() noexcept { + TBotIdSet res; + res.Bots.set(); + return res; + } + + constexpr bool contains(ui32 botId) const noexcept { + return (botId < Bots.size()) && Bots[botId]; + } + + bool insert(ui32 botId) noexcept { + if (botId >= Bots.size() || Bots[botId]) { + return false; + } + Bots[botId] = true; + return true; + } + + bool remove(ui32 botId) noexcept { + if (botId >= Bots.size() || !Bots[botId]) { + return false; + } + Bots[botId] = false; + return true; + } + + void clear() noexcept { + Bots.reset(); + } + + size_t size() const noexcept { + return Bots.count(); + } + + bool empty() const noexcept { + return Bots.none(); + } + + bool operator==(const TBotIdSet& rhs) const noexcept = default; + + TBotIdSet operator&(TBotIdSet rhs) const noexcept { + rhs.Bots &= Bots; + return rhs; + } + + TBotIdSet operator|(TBotIdSet rhs) const noexcept { + rhs.Bots |= Bots; + return rhs; + } + + TBotIdSet operator~() const noexcept { + TBotIdSet result; + result.Bots = ~Bots; + return result; + } + + class iterator + { + public: + auto operator * () const noexcept { + return BotId; + } + + iterator& operator ++ () noexcept { + while (BotId < Bots.size()) { + if (Bots[++BotId]) { + break; + } + } + return *this; + } + + bool operator == (const iterator& rhs) const noexcept { + return (&Bots == &rhs.Bots) && (BotId == rhs.BotId); + } + + bool operator != (const iterator& rhs) const noexcept { + return !(*this == rhs); + } + + private: + friend class TBotIdSet; + iterator(const TData& bots, ui32 botId) + : Bots(bots) + , BotId(botId) + { + while (BotId < Bots.size() && !Bots[BotId]) { + ++BotId; + } + } + + private: + const TData& Bots; + ui32 BotId; + }; + + iterator begin() const noexcept { + return {Bots, robotstxtcfg::id_anybot}; + } + + iterator end() const noexcept { + return {Bots, robotstxtcfg::max_botid}; + } + +private: + TData Bots {}; +}; diff --git a/library/cpp/robots_txt/robotstxtcfg/robotstxtcfg.cpp b/library/cpp/robots_txt/robotstxtcfg/robotstxtcfg.cpp new file mode 100644 index 0000000000..c5652b81c5 --- /dev/null +++ b/library/cpp/robots_txt/robotstxtcfg/robotstxtcfg.cpp @@ -0,0 +1,2 @@ +#include "robotstxtcfg.h" +// header compile test diff --git a/library/cpp/robots_txt/robotstxtcfg/robotstxtcfg.h b/library/cpp/robots_txt/robotstxtcfg/robotstxtcfg.h new file mode 100644 index 0000000000..2cf9430d7c --- /dev/null +++ b/library/cpp/robots_txt/robotstxtcfg/robotstxtcfg.h @@ -0,0 +1,11 @@ +#pragma once + +#include "bot_id_set.h" + + +namespace robotstxtcfg { + +static const TBotIdSet defaultSupportedBotIds = {id_defbot}; +static const TBotIdSet allSupportedBotIds = TBotIdSet::All(); + +} // namespace robotstxtcfg diff --git a/library/cpp/robots_txt/robotstxtcfg/user_agents.cpp b/library/cpp/robots_txt/robotstxtcfg/user_agents.cpp new file mode 100644 index 0000000000..60b353a427 --- /dev/null +++ b/library/cpp/robots_txt/robotstxtcfg/user_agents.cpp @@ -0,0 +1,2 @@ +#include "user_agents.h" +// header compile test diff --git a/library/cpp/robots_txt/robotstxtcfg/user_agents.h b/library/cpp/robots_txt/robotstxtcfg/user_agents.h new file mode 100644 index 0000000000..59245d07cb --- /dev/null +++ b/library/cpp/robots_txt/robotstxtcfg/user_agents.h @@ -0,0 +1,303 @@ +#pragma once + +#include <library/cpp/case_insensitive_string/case_insensitive_string.h> + + +namespace robotstxtcfg { + // robots.txt agents and identifiers + + enum EBots : ui32 { + id_anybot = 0, + id_yandexbot = 1, + id_yandexmediabot = 2, + id_yandeximagesbot = 3, + id_googlebot = 4, + id_yandexbotmirr = 5, + id_yahooslurp = 6, + id_msnbot = 7, + id_yandexcatalogbot = 8, + id_yandexdirectbot = 9, + id_yandexblogsbot = 10, + id_yandexnewsbot = 11, + id_yandexpagechk = 12, + id_yandexmetrikabot = 13, + id_yandexbrowser = 14, + id_yandexmarketbot = 15, + id_yandexcalendarbot = 16, + id_yandexwebmasterbot = 17, + id_yandexvideobot = 18, + id_yandeximageresizerbot = 19, + id_yandexadnetbot = 20, + id_yandexpartnerbot = 21, + id_yandexdirectdbot = 22, + id_yandextravelbot = 23, + id_yandexmobilebot = 24, + id_yandexrcabot = 25, + id_yandexdirectdynbot = 26, + id_yandexmobilebot_ed = 27, + id_yandexaccessibilitybot = 28, + id_baidubot = 29, + id_yandexscreenshotbot = 30, + id_yandexmetrikayabs = 31, + id_yandexvideoparserbot = 32, + id_yandexnewsbot4 = 33, + id_yandexmarketbot2 = 34, + id_yandexmedianabot = 35, + id_yandexsearchshopbot = 36, + id_yandexontodbbot = 37, + id_yandexontodbapibot = 38, + id_yandexampbot = 39, + id_yandexvideohosting = 40, + id_yandexmediaselling = 41, + id_yandexverticals = 42, + id_yandexturbobot = 43, + id_yandexzenbot = 44, + id_yandextrackerbot = 45, + id_yandexmetrikabot4 = 46, + id_yandexmobilescreenshotbot = 47, + id_yandexfaviconsbot = 48, + id_yandexrenderresourcesbot = 49, + id_yandexactivity = 50, + max_botid + }; + + static const ui32 id_defbot = id_yandexbot; + + struct TBotInfo { + TCaseInsensitiveStringBuf ReqPrefix; + TCaseInsensitiveStringBuf FullName; + TStringBuf FromField = {}; + TStringBuf UserAgent = {}; + TStringBuf RotorUserAgent = {}; + bool ExplicitDisallow = false; + }; + + static constexpr TStringBuf UserAgentFrom("support@search.yandex.ru"); + + static constexpr TBotInfo BotInfoArr[] = { + {"*", "*"}, + {"Yandex", "YandexBot/3.0", UserAgentFrom, + "Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)", + "Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", + false}, + {"Yandex", "YandexMedia/3.0", UserAgentFrom, + "Mozilla/5.0 (compatible; YandexMedia/3.0; +http://yandex.com/bots)", + "Mozilla/5.0 (compatible; YandexMedia/3.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", + false}, + {"Yandex", "YandexImages/3.0", UserAgentFrom, + "Mozilla/5.0 (compatible; YandexImages/3.0; +http://yandex.com/bots)", + "Mozilla/5.0 (compatible; YandexImages/3.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", + false}, + {"Google", "GoogleBot"}, + {"Yandex", "YandexBot/3.0", UserAgentFrom, + "Mozilla/5.0 (compatible; YandexBot/3.0; MirrorDetector; +http://yandex.com/bots)", + "Mozilla/5.0 (compatible; YandexBot/3.0; MirrorDetector; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", + false}, + {"Slurp", "Slurp"}, + {"msn", "msnbot"}, + {"Yandex", "YandexCatalog/3.0", UserAgentFrom, + "Mozilla/5.0 (compatible; YandexCatalog/3.0; +http://yandex.com/bots)", + "Mozilla/5.0 (compatible; YandexCatalog/3.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", + false}, + {"YaDirectFetcher", "YaDirectFetcher/1.0", UserAgentFrom, + "Mozilla/5.0 (compatible; YaDirectFetcher/1.0; +http://yandex.com/bots)", + "Mozilla/5.0 (compatible; YaDirectFetcher/1.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", + true}, + + {"Yandex", "YandexBlogs/0.99", UserAgentFrom, + "Mozilla/5.0 (compatible; YandexBlogs/0.99; robot; +http://yandex.com/bots)", + "Mozilla/5.0 (compatible; YandexBlogs/0.99; robot; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", + false}, + {"Yandex", "YandexNews/3.0", UserAgentFrom, + "Mozilla/5.0 (compatible; YandexNews/3.0; robot; +http://yandex.com/bots)", + "Mozilla/5.0 (compatible; YandexNews/3.0; robot; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", + false}, + {"Yandex", "YandexPagechecker/2.0", UserAgentFrom, + "Mozilla/5.0 (compatible; YandexPagechecker/2.0; +http://yandex.com/bots)", + "Mozilla/5.0 (compatible; YandexPagechecker/2.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", + false}, + {"Yandex", "YandexMetrika/3.0", UserAgentFrom, + "Mozilla/5.0 (compatible; YandexMetrika/3.0; +http://yandex.com/bots)", + "Mozilla/5.0 (compatible; YandexMetrika/3.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", + false}, + {"Yandex", "YandexBrowser/1.0", UserAgentFrom, + "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/536.5 (KHTML, like Gecko) YaBrowser/1.0.1084.5402 Chrome/19.0.1084.5409 Safari/536.5", + "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/536.5 (KHTML, like Gecko) YaBrowser/1.0.1084.5402 Chrome/19.0.1084.5409 Safari/536.5", + false}, + {"Yandex", "YandexMarket/1.0", UserAgentFrom, + "Mozilla/5.0 (compatible; YandexMarket/1.0; +http://yandex.com/bots)", + "Mozilla/5.0 (compatible; YandexMarket/1.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", + false}, + {"YandexCalendar", "YandexCalendar/1.0", UserAgentFrom, + "Mozilla/5.0 (compatible; YandexCalendar/1.0 +http://yandex.com/bots)", + "Mozilla/5.0 (compatible; YandexCalendar/1.0 +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", + true}, + {"Yandex", "YandexWebmaster/2.0", UserAgentFrom, + "Mozilla/5.0 (compatible; YandexWebmaster/2.0; +http://yandex.com/bots)", + "Mozilla/5.0 (compatible; YandexWebmaster/2.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", + false}, + {"Yandex", "YandexVideo/3.0", UserAgentFrom, + "Mozilla/5.0 (compatible; YandexVideo/3.0; +http://yandex.com/bots)", + "Mozilla/5.0 (compatible; YandexVideo/3.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", + false}, + {"Yandex", "YandexImageResizer/2.0", UserAgentFrom, + "Mozilla/5.0 (compatible; YandexImageResizer/2.0; +http://yandex.com/bots)", + "Mozilla/5.0 (compatible; YandexImageResizer/2.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", + false}, + + {"YandexDirect", "YandexDirect/3.0", UserAgentFrom, + "Mozilla/5.0 (compatible; YandexDirect/3.0; +http://yandex.com/bots)", + "Mozilla/5.0 (compatible; YandexDirect/3.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", + true}, + {"YandexPartner", "YandexPartner/3.0", UserAgentFrom, + "Mozilla/5.0 (compatible; YandexPartner/3.0; +http://yandex.com/bots)", + "Mozilla/5.0 (compatible; YandexPartner/3.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", + true}, + {"YaDirectFetcher", "YaDirectFetcher/1.0", UserAgentFrom, + "Mozilla/5.0 (compatible; YaDirectFetcher/1.0; Dyatel; +http://yandex.com/bots)", + "Mozilla/5.0 (compatible; YaDirectFetcher/1.0; Dyatel; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", + true}, + {"Yandex", "YandexTravel/1.0", UserAgentFrom, + "Mozilla/5.0 (compatible; YandexTravel/1.0; +http://yandex.com/bots)", + "Mozilla/5.0 (compatible; YandexTravel/1.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", + false}, + {"Yandex", "YandexBot/3.0", UserAgentFrom, + "Mozilla/5.0 (iPhone; CPU iPhone OS 8_1 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12B411 Safari/600.1.4 (compatible; YandexBot/3.0; +http://yandex.com/bots)", + "Mozilla/5.0 (iPhone; CPU iPhone OS 8_1 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12B411 Safari/600.1.4 (compatible; YandexBot/3.0; +http://yandex.com/bots)", + false}, + {"YandexRCA", "YandexRCA/1.0", UserAgentFrom, + "Mozilla/5.0 (compatible; YandexRCA/1.0; +http://yandex.com/bots)", + "Mozilla/5.0 (compatible; YandexRCA/1.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", + true}, + {"YandexDirectDyn", "YandexDirectDyn/1.0", UserAgentFrom, + "Mozilla/5.0 (compatible; YandexDirectDyn/1.0; +http://yandex.com/bots)", + "Mozilla/5.0 (compatible; YandexDirectDyn/1.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", + true}, + {"YandexMobileBot", "YandexMobileBot/3.0", UserAgentFrom, + "Mozilla/5.0 (iPhone; CPU iPhone OS 15_4_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.4 Mobile/15E148 Safari/604.1 (compatible; YandexMobileBot/3.0; +http://yandex.com/bots)", + "Mozilla/5.0 (iPhone; CPU iPhone OS 15_4_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.4 Mobile/15E148 Safari/604.1 (compatible; YandexMobileBot/3.0; +http://yandex.com/bots)", + true}, + {"YandexAccessibilityBot", "YandexAccessibilityBot/3.0", UserAgentFrom, + "Mozilla/5.0 (compatible; YandexAccessibilityBot/3.0; +http://yandex.com/bots)", + "Mozilla/5.0 (compatible; YandexAccessibilityBot/3.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", + true}, + {"Baidu", "Baiduspider"}, + + {"YandexScreenshotBot", "YandexScreenshotBot/3.0", UserAgentFrom, + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36 (compatible; YandexScreenshotBot/3.0; +http://yandex.com/bots)", + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36 (compatible; YandexScreenshotBot/3.0; +http://yandex.com/bots)", + true}, + {"YandexMetrika", "YandexMetrika/2.0", UserAgentFrom, + "Mozilla/5.0 (compatible; YandexMetrika/2.0; +http://yandex.com/bots yabs01)", + "Mozilla/5.0 (compatible; YandexMetrika/2.0; +http://yandex.com/bots yabs01) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", + true}, + {"YandexVideoParser", "YandexVideoParser/1.0", UserAgentFrom, + "Mozilla/5.0 (compatible; YandexVideoParser/1.0; +http://yandex.com/bots)", + "Mozilla/5.0 (compatible; YandexVideoParser/1.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", + true}, + {"Yandex", "YandexNews/4.0", UserAgentFrom, + "Mozilla/5.0 (compatible; YandexNews/4.0; +http://yandex.com/bots)", + "Mozilla/5.0 (compatible; YandexNews/4.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", + true}, + {"YandexMarket", "YandexMarket/2.0", UserAgentFrom, + "Mozilla/5.0 (compatible; YandexMarket/2.0; +http://yandex.com/bots)", + "Mozilla/5.0 (compatible; YandexMarket/2.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", + true}, + {"YandexMedianaBot", "YandexMedianaBot/1.0", UserAgentFrom, + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36 (compatible; YandexMedianaBot/1.0; +http://yandex.com/bots)", + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36 (compatible; YandexMedianaBot/1.0; +http://yandex.com/bots)", + true}, + {"YandexSearchShop", "YandexSearchShop/1.0", UserAgentFrom, + "Mozilla/5.0 (compatible; YandexSearchShop/1.0; +http://yandex.com/bots)", + "Mozilla/5.0 (compatible; YandexSearchShop/1.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", + true}, + {"Yandex", "YandexOntoDB/1.0", UserAgentFrom, + "Mozilla/5.0 (compatible; YandexOntoDB/1.0; +http://yandex.com/bots)", + "Mozilla/5.0 (compatible; YandexOntoDB/1.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", + false}, + {"YandexOntoDBAPI", "YandexOntoDBAPI/1.0", UserAgentFrom, + "Mozilla/5.0 (compatible; YandexOntoDBAPI/1.0; +http://yandex.com/bots)", + "Mozilla/5.0 (compatible; YandexOntoDBAPI/1.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", + true}, + {"Yandex-AMPHTML", "Yandex-AMPHTML", UserAgentFrom, + "Mozilla/5.0 (compatible; Yandex-AMPHTML; +http://yandex.com/bots)", + "Mozilla/5.0 (compatible; Yandex-AMPHTML; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", + true}, + + {"YandexVideoHosting", "YandexVideoHosting/1.0", UserAgentFrom, + "Mozilla/5.0 (compatible; YandexVideoHosting/1.0; +http://yandex.com/bots)", + "Mozilla/5.0 (compatible; YandexVideoHosting/1.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", + true}, + {"YandexMediaSelling", "YandexMediaSelling/1.0", UserAgentFrom, + "Mozilla/5.0 (compatible; YandexMediaSelling/1.0; +http://yandex.com/bots)", + "Mozilla/5.0 (compatible; YandexMediaSelling/1.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", + true}, + {"YandexVerticals", "YandexVerticals/1.0", UserAgentFrom, + "Mozilla/5.0 (compatible; YandexVerticals/1.0; +http://yandex.com/bots)", + "Mozilla/5.0 (compatible; YandexVerticals/1.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", + true}, + {"YandexTurbo", "YandexTurbo/1.0", UserAgentFrom, + "Mozilla/5.0 (compatible; YandexTurbo/1.0; +http://yandex.com/bots)", + "Mozilla/5.0 (compatible; YandexTurbo/1.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", + true}, + {"YandexZenRss", "YandexZenRss/1.0", UserAgentFrom, + "Mozilla/5.0 (compatible; YandexZenRss/1.0; +http://yandex.com/bots)", + "Mozilla/5.0 (compatible; YandexZenRss/1.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", + true}, + {"YandexTracker", "YandexTracker/1.0", UserAgentFrom, + "Mozilla/5.0 (compatible; YandexTracker/1.0; +http://yandex.com/bots)", + "Mozilla/5.0 (compatible; YandexTracker/1.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", + true}, + {"YandexMetrika", "YandexMetrika/4.0", UserAgentFrom, + "Mozilla/5.0 (compatible; YandexMetrika/4.0; +http://yandex.com/bots)", + "Mozilla/5.0 (compatible; YandexMetrika/4.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", + true}, + {"YandexMobileScreenShotBot", "YandexMobileScreenShotBot/1.0", UserAgentFrom, + "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/11.0 Mobile/12B411 Safari/600.1.4 (compatible; YandexBot/3.0; +http://yandex.com/bots)", + "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/11.0 Mobile/12B411 Safari/600.1.4 (compatible; YandexBot/3.0; +http://yandex.com/bots)", + true}, + {"YandexFavicons", "YandexFavicons/1.0", UserAgentFrom, + "Mozilla/5.0 (compatible; YandexFavicons/1.0; +http://yandex.com/bots)", + "Mozilla/5.0 (compatible; YandexFavicons/1.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", + true}, + {"YandexRenderResourcesBot", "YandexRenderResourcesBot/1.0", UserAgentFrom, + "Mozilla/5.0 (compatible; YandexRenderResourcesBot/1.0; +http://yandex.com/bots)", + "Mozilla/5.0 (compatible; YandexRenderResourcesBot/1.0; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", + true}, + {"YandexActivity", "YandexActivity/1.0", UserAgentFrom, + "Mozilla/5.0 (compatible; YandexActivity; robot; +http://yandex.com/bots)", + "Mozilla/5.0 (compatible; YandexActivity; robot; +http://yandex.com/bots) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0", + true} + }; + + static_assert(std::size(BotInfoArr) == max_botid); + + constexpr auto GetReqPrefix(ui32 botId) { + return BotInfoArr[botId].ReqPrefix; + } + + constexpr auto GetFullName(ui32 botId) { + return BotInfoArr[botId].FullName; + } + + constexpr auto GetFromField(ui32 botId) { + return BotInfoArr[botId].FromField; + } + + constexpr auto GetUserAgent(ui32 botId) { + return BotInfoArr[botId].UserAgent; + } + + constexpr auto GetRotorUserAgent(ui32 botId) { + return BotInfoArr[botId].RotorUserAgent; + } + + constexpr bool IsExplicitDisallow(ui32 botId) { + return BotInfoArr[botId].ExplicitDisallow; + } + + constexpr bool IsYandexBotId(ui32 botId) { + return !BotInfoArr[botId].UserAgent.empty(); + } + +} // namespace robotstxtcfg diff --git a/library/cpp/robots_txt/robotstxtcfg/ya.make b/library/cpp/robots_txt/robotstxtcfg/ya.make new file mode 100644 index 0000000000..61c731be42 --- /dev/null +++ b/library/cpp/robots_txt/robotstxtcfg/ya.make @@ -0,0 +1,13 @@ +LIBRARY() + +SRCS( + bot_id_set.cpp + robotstxtcfg.cpp + user_agents.cpp +) + +PEERDIR( + library/cpp/case_insensitive_string +) + +END() diff --git a/library/cpp/robots_txt/rules_handler.cpp b/library/cpp/robots_txt/rules_handler.cpp new file mode 100644 index 0000000000..4297db9d21 --- /dev/null +++ b/library/cpp/robots_txt/rules_handler.cpp @@ -0,0 +1,514 @@ +#include "robots_txt.h" +#include "constants.h" + +#include <library/cpp/uri/http_url.h> +#include <library/cpp/charset/ci_string.h> +#include <library/cpp/string_utils/url/url.h> +#include <util/system/maxlen.h> +#include <util/generic/yexception.h> +#include <util/generic/algorithm.h> + + +namespace { + +TBotIdSet ConvertBotIdSet(const TSet<ui32>& botIds) noexcept { + TBotIdSet result; + for (auto id : botIds) { + result.insert(id); + } + return result; +} + +} // namespace + +TRobotsTxtRulesIterator::TRobotsTxtRulesIterator(const char* begin, const char* end) + : Begin(begin) + , End(end) +{ +} + +void TRobotsTxtRulesIterator::Next() { + while (Begin < End && *Begin) + ++Begin; + while (Begin < End && !isalpha(*Begin)) + ++Begin; +} + +bool TRobotsTxtRulesIterator::HasRule() const { + return Begin < End; +} + +const char* TRobotsTxtRulesIterator::GetRule() const { + return Begin + 1; +} + +TString TRobotsTxtRulesIterator::GetInitialRule() const { + auto begin = Begin + 1; + TStringBuf rule(begin, strlen(begin)); + + switch (*Begin) { + case 'a': + case 'd': + return rule.EndsWith('*') ? TString(rule.Chop(1)) : TString::Join(rule, '$'); + default: + return TString(rule); + } +} + +EDirectiveType TRobotsTxtRulesIterator::GetRuleType() const { + return CharToDirType(*Begin); +} + +EDirectiveType TRobotsTxtRulesIterator::CharToDirType(char ch) { + switch (toupper(ch)) { + case 'A': + return ALLOW; + case 'C': + return CRAWL_DELAY; + case 'D': + return DISALLOW; + case 'H': + return HOST; + case 'P': + return CLEAN_PARAM; + case 'S': + return SITEMAP; + } + return UNKNOWN; +} + +TRobotsTxtRulesHandlerBase::TRobotsTxtRulesHandlerBase( + TBotIdSet supportedBotIds, + int robotsMaxSize, + int maxRulesNumber, + bool saveDataForAnyBot) + : HandleErrors(false) + , SiteMaps() + , CleanParams() + , HostDirective("") + , Errors() + , AcceptedLines() + , CrossSectionAcceptedLines() + , BotIdToInfo(robotstxtcfg::max_botid) + , RobotsMaxSize(robotsMaxSize) + , MaxRulesNumber(maxRulesNumber) + , SaveDataForAnyBot(saveDataForAnyBot) + , SupportedBotIds(supportedBotIds) +{ + Y_ENSURE(!supportedBotIds.empty()); + + if (RobotsMaxSize <= 0) + RobotsMaxSize = robots_max; + if (MaxRulesNumber <= 0) + MaxRulesNumber = max_rules_count; + + ResetOptimized(); +} + +TRobotsTxtRulesHandlerBase::TRobotsTxtRulesHandlerBase( + const TSet<ui32>& supportedBotIds, + int robotsMaxSize, + int maxRulesNumber, + bool saveDataForAnyBot) + : TRobotsTxtRulesHandlerBase(ConvertBotIdSet(supportedBotIds), robotsMaxSize, maxRulesNumber, saveDataForAnyBot) +{} + +TRobotsTxtRulesHandlerBase::~TRobotsTxtRulesHandlerBase() = default; + +void TRobotsTxtRulesHandlerBase::CheckBotIdValidity(const ui32 botId) const { + if (botId >= robotstxtcfg::max_botid || !IsBotIdSupported(botId)) + ythrow yexception() << "robots.txt parser requested for invalid or unsupported botId = " << botId << Endl; + ; +} + +int TRobotsTxtRulesHandlerBase::GetCrawlDelay(const ui32 botId, bool* realInfo) const { + const auto id = GetMappedBotId(botId, false); + if (realInfo) + *realInfo = bool(id); + return BotIdToInfo[id.GetOrElse(robotstxtcfg::id_anybot)].CrawlDelay; +} + +int TRobotsTxtRulesHandlerBase::GetMinCrawlDelay(int defaultCrawlDelay) const { + int res = INT_MAX; + bool useDefault = false; + for (ui32 botId = 0; botId < robotstxtcfg::max_botid; ++botId) { + if (robotstxtcfg::IsYandexBotId(botId) && IsBotIdSupported(botId) && !IsDisallowAll(botId)) { + bool realInfo; + int curCrawlDelay = GetCrawlDelay(botId, &realInfo); + if (realInfo) { + if (curCrawlDelay == -1) { + useDefault = true; + } else { + res = Min(res, curCrawlDelay); + } + } + } + } + + if (useDefault && defaultCrawlDelay < res) { + return -1; + } + + if (res == INT_MAX) { + res = GetCrawlDelay(robotstxtcfg::id_anybot); + } + + return res; +} + +void TRobotsTxtRulesHandlerBase::SetCrawlDelay(const ui32 botId, int crawlDelay) { + CheckBotIdValidity(botId); + BotIdToInfo[botId].CrawlDelay = crawlDelay; +} + +const TVector<TString> TRobotsTxtRulesHandlerBase::GetSiteMaps() const { + return TVector<TString>(SiteMaps.begin(), SiteMaps.end()); +} + +void TRobotsTxtRulesHandlerBase::AddSiteMap(const char* sitemap) { + SiteMaps.insert(sitemap); +} + +const TVector<TString> TRobotsTxtRulesHandlerBase::GetCleanParams() const { + return TVector<TString>(CleanParams.begin(), CleanParams.end()); +} + +void TRobotsTxtRulesHandlerBase::AddCleanParam(const char* cleanParam) { + CleanParams.insert(cleanParam); +} + +const TString& TRobotsTxtRulesHandlerBase::GetHostDirective() const { + return HostDirective; +} + +void TRobotsTxtRulesHandlerBase::SetHostDirective(const char* hostDirective) { + HostDirective = hostDirective; +} + +const TRobotsTxtRulesHandlerBase::TErrorVector& TRobotsTxtRulesHandlerBase::GetErrors() const { + return Errors; +} + +TVector<int> TRobotsTxtRulesHandlerBase::GetAcceptedLines(const ui32 botId) const { + TVector<int> ret; + for (size_t i = 0; i < CrossSectionAcceptedLines.size(); ++i) + ret.push_back(CrossSectionAcceptedLines[i]); + + bool hasLinesForBotId = false; + for (size_t i = 0; i < AcceptedLines.size(); ++i) { + if (AcceptedLines[i].first == botId) { + hasLinesForBotId = true; + break; + } + } + + for (size_t i = 0; i < AcceptedLines.size(); ++i) { + if (hasLinesForBotId && AcceptedLines[i].first == botId) { + ret.push_back(AcceptedLines[i].second); + } else if (!hasLinesForBotId && AcceptedLines[i].first == robotstxtcfg::id_anybot) { + ret.push_back(AcceptedLines[i].second); + } + } + + Sort(ret.begin(), ret.end()); + + return ret; +} + +void TRobotsTxtRulesHandlerBase::AddAcceptedLine(ui32 line, const TBotIdSet& botIds, bool isCrossSection) { + if (isCrossSection) { + CrossSectionAcceptedLines.push_back(line); + return; + } + + for (auto botId : botIds) { + AcceptedLines.push_back(TBotIdAcceptedLine(botId, line)); + } +} + +void TRobotsTxtRulesHandlerBase::SetErrorsHandling(bool handleErrors) { + HandleErrors = handleErrors; +} + +bool TRobotsTxtRulesHandlerBase::IsHandlingErrors() const { + return HandleErrors; +} + +EDirectiveType TRobotsTxtRulesHandlerBase::NameToDirType(const char* d) { + if (!strcmp("disallow", d)) + return DISALLOW; + if (!strcmp("allow", d)) + return ALLOW; + if (!strcmp("user-agent", d)) + return USER_AGENT; + if (!strcmp("host", d)) + return HOST; + if (!strcmp("sitemap", d)) + return SITEMAP; + if (!strcmp("clean-param", d)) + return CLEAN_PARAM; + if (!strcmp("crawl-delay", d)) + return CRAWL_DELAY; + return UNKNOWN; +} + +const char* TRobotsTxtRulesHandlerBase::DirTypeToName(EDirectiveType t) { + static const char* name[] = {"Allow", "Crawl-Delay", "Disallow", "Host", "Clean-Param", "Sitemap", "User-Agent", "Unknown"}; + switch (t) { + case ALLOW: + return name[0]; + case CRAWL_DELAY: + return name[1]; + case DISALLOW: + return name[2]; + case HOST: + return name[3]; + case CLEAN_PARAM: + return name[4]; + case SITEMAP: + return name[5]; + case USER_AGENT: + return name[6]; + case UNKNOWN: + return name[7]; + } + return name[7]; +} + +bool TRobotsTxtRulesHandlerBase::CheckRobot( + const char* userAgent, + TBotIdSet& botIds, + const TVector<ui32>* botIdToMaxAppropriateUserAgentNameLength) const +{ + TCaseInsensitiveStringBuf agent(userAgent); + + for (size_t botIndex = 0; botIndex < robotstxtcfg::max_botid; ++botIndex) { + if (!IsBotIdSupported(botIndex)) + continue; + + bool hasRequiredAgentNamePrefix = agent.StartsWith(robotstxtcfg::GetReqPrefix(botIndex)); + bool isContainedInFullName = robotstxtcfg::GetFullName(botIndex).StartsWith(agent); + bool wasMoreImportantAgent = false; + if (botIdToMaxAppropriateUserAgentNameLength) + wasMoreImportantAgent = agent.size() < (*botIdToMaxAppropriateUserAgentNameLength)[botIndex]; + + if (hasRequiredAgentNamePrefix && isContainedInFullName && !wasMoreImportantAgent) { + botIds.insert(botIndex); + } + } + + return !botIds.empty(); +} + +int TRobotsTxtRulesHandlerBase::CheckRule(const char* value, int line, TRobotsTxtRulesHandlerBase* rulesHandler) { + if (!rulesHandler->IsHandlingErrors()) + return 0; + + if (auto len = strlen(value); len > max_rule_length) { + rulesHandler->AddError(ERROR_RULE_HUGE, line); + } + + bool upper = false, suspect = false; + for (const char* r = value; *r; ++r) { + if (!upper && isupper(*r)) + upper = true; + if (!suspect && !isalnum(*r) && !strchr("/_?=.-*%&~[]:;@", *r) && (*(r + 1) || *r != '$')) + suspect = true; + } + if (suspect) + rulesHandler->AddError(WARNING_SUSPECT_SYMBOL, line); + if (upper) + rulesHandler->AddError(WARNING_UPPER_REGISTER, line); + return suspect || upper; +} + +void TRobotsTxtRulesHandlerBase::AddError(EFormatErrorType type, int line) { + if (!HandleErrors) + return; + Errors.push_back(std::make_pair(type, line)); +} + +void TRobotsTxtRulesHandlerBase::ResetOptimized() noexcept { + for (ui32 i = 0; i < OptimizedBotIdToStoredBotId.size(); ++i) { + OptimizedBotIdToStoredBotId[i] = i; // by default, every bot maps to itself + } +} + +void TRobotsTxtRulesHandlerBase::Clear() { + SiteMaps.clear(); + CleanParams.clear(); + HostDirective = ""; + if (HandleErrors) { + AcceptedLines.clear(); + CrossSectionAcceptedLines.clear(); + Errors.clear(); + } + + for (size_t botId = 0; botId < BotIdToInfo.size(); ++botId) { + BotIdToInfo[botId].CrawlDelay = -1; + } + + LoadedBotIds.clear(); +} + +void TRobotsTxtRulesHandlerBase::ClearInternal(const ui32 botId) { + CheckBotIdValidity(botId); + BotIdToInfo[botId].CrawlDelay = -1; + + TVector<TBotIdAcceptedLine> newAcceptedLines; + for (size_t i = 0; i < AcceptedLines.size(); ++i) + if (AcceptedLines[i].first != botId) + newAcceptedLines.push_back(AcceptedLines[i]); + + AcceptedLines.swap(newAcceptedLines); +} + +int TRobotsTxtRulesHandlerBase::CheckHost(const char* host) { + THttpURL parsed; + TString copyHost = host; + + if (GetHttpPrefixSize(copyHost) == 0) { + copyHost = TString("http://") + copyHost; + } + + return parsed.Parse(copyHost.data(), THttpURL::FeaturesRobot) == THttpURL::ParsedOK && parsed.GetField(THttpURL::FieldHost) != TString(""); +} + +int TRobotsTxtRulesHandlerBase::CheckSitemapUrl(const char* url, const char* host, TString& modifiedUrl) { + if (host != nullptr && strlen(url) > 0 && url[0] == '/') { + modifiedUrl = TString(host) + url; + } else { + modifiedUrl = url; + } + + url = modifiedUrl.data(); + + if (strlen(url) >= URL_MAX - 8) + return 0; + THttpURL parsed; + if (parsed.Parse(url, THttpURL::FeaturesRobot) || !parsed.IsValidAbs()) + return 0; + if (parsed.GetScheme() != THttpURL::SchemeHTTP && parsed.GetScheme() != THttpURL::SchemeHTTPS) + return 0; + return CheckHost(parsed.PrintS(THttpURL::FlagHostPort).data()); +} + +// s - is space separated pair of clean-params (separated by &) and path prefix +int TRobotsTxtRulesHandlerBase::CheckAndNormCleanParam(TString& value) { + if (value.find(' ') == TString::npos) { + value.push_back(' '); + } + + const char* s = value.data(); + if (!s || !*s || strlen(s) > URL_MAX / 2 - 9) + return 0; + const char* p = s; + while (*p && !isspace(*p)) + ++p; + for (; s != p; ++s) { + // allowed only following not alpha-numerical symbols + if (!isalnum(*s) && !strchr("+-=_&%[]{}():.", *s)) + return 0; + // clean-params for prefix can be enumerated by & symbol, && not allowed syntax + if (*s == '&' && *(s + 1) == '&') + return 0; + } + const char* pathPrefix = p + 1; + while (isspace(*p)) + ++p; + char r[URL_MAX]; + char* pr = r; + for (; *p; ++p) { + if (!isalnum(*p) && !strchr(".-/*_,;:%", *p)) + return 0; + if (*p == '*') + *pr++ = '.'; + if (*p == '.') + *pr++ = '\\'; + *pr++ = *p; + } + *pr++ = '.'; + *pr++ = '*'; + *pr = 0; + TString params = value.substr(0, pathPrefix - value.data()); + value = params + r; + return 1; +} + +int TRobotsTxtRulesHandlerBase::ParseCrawlDelay(const char* value, int& crawlDelay) { + static const int MAX_CRAWL_DELAY = 1 << 10; + int val = 0; + const char* p = value; + for (; isdigit(*p); ++p) { + val = val * 10 + *p - '0'; + if (val > MAX_CRAWL_DELAY) + return 0; + } + if (*p) { + if (*p++ != '.') + return 0; + if (strspn(p, "1234567890") != strlen(p)) + return 0; + } + for (const char* s = p; s - p < 3; ++s) + val = val * 10 + (s < p + strlen(p) ? *s - '0' : 0); + crawlDelay = val; + return 1; +} + +bool TRobotsTxtRulesHandlerBase::AddRuleWithErrorCheck(const ui32 botId, TStringBuf rule, char type, TRobotsTxtParser& parser) { + if (!IsBotIdSupported(botId)) + return true; + + if (!AddRule(botId, rule, type)) { + AddError(ERROR_ROBOTS_HUGE, parser.GetLineNumber()); + AfterParse(botId); + return false; + } + return true; +} + +int TRobotsTxtRulesHandlerBase::OnHost(const ui32 botId, TRobotsTxtParser& parser, const char* value, TRobotsTxtRulesHandlerBase*& rulesHandler) { + // Temporary hack for correct repacking robots.txt from new format to old + // Remove it, when robot-stable-2010-10-17 will be deployed in production + if (!IsBotIdSupported(botId)) + return 0; + // end of hack + + if (rulesHandler->HostDirective != "") + rulesHandler->AddError(ERROR_HOST_MULTI, parser.GetLineNumber()); + else { + if (!CheckHost(value)) + rulesHandler->AddError(ERROR_HOST_FORMAT, parser.GetLineNumber()); + else { + rulesHandler->SetHostDirective(value); + if (!rulesHandler->AddRuleWithErrorCheck(botId, value, 'H', parser)) + return 2; + } + } + return 0; +} + +bool TRobotsTxtRulesHandlerBase::IsBotIdLoaded(const ui32 botId) const { + return LoadedBotIds.contains(botId); +} + +bool TRobotsTxtRulesHandlerBase::IsBotIdSupported(const ui32 botId) const { + return (SaveDataForAnyBot && botId == robotstxtcfg::id_anybot) || SupportedBotIds.contains(botId); +} + +ui32 TRobotsTxtRulesHandlerBase::GetNotOptimizedBotId(const ui32 botId) const { + return (botId < OptimizedBotIdToStoredBotId.size()) + ? OptimizedBotIdToStoredBotId[botId] + : botId; +} + +TMaybe<ui32> TRobotsTxtRulesHandlerBase::GetMappedBotId(ui32 botId, bool useAny) const { + botId = GetNotOptimizedBotId(botId); + CheckBotIdValidity(botId); + if (IsBotIdLoaded(botId)) + return botId; + if (useAny) + return robotstxtcfg::id_anybot; + return {}; +} diff --git a/library/cpp/robots_txt/ya.make b/library/cpp/robots_txt/ya.make new file mode 100644 index 0000000000..c12b57ea04 --- /dev/null +++ b/library/cpp/robots_txt/ya.make @@ -0,0 +1,18 @@ +LIBRARY() + +SRCS( + prefix_tree.cpp + prefix_tree_rules_handler.cpp + robots_txt_parser.cpp + rules_handler.cpp +) + +PEERDIR( + library/cpp/robots_txt/robotstxtcfg + library/cpp/case_insensitive_string + library/cpp/charset + library/cpp/string_utils/url + library/cpp/uri +) + +END() diff --git a/library/cpp/yconf/CMakeLists.darwin-x86_64.txt b/library/cpp/yconf/CMakeLists.darwin-x86_64.txt new file mode 100644 index 0000000000..4e5bbf836d --- /dev/null +++ b/library/cpp/yconf/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,19 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(library-cpp-yconf) +target_link_libraries(library-cpp-yconf PUBLIC + contrib-libs-cxxsupp + yutil + library-cpp-charset + library-cpp-logger +) +target_sources(library-cpp-yconf PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/yconf/conf.cpp +) diff --git a/library/cpp/yconf/CMakeLists.linux-aarch64.txt b/library/cpp/yconf/CMakeLists.linux-aarch64.txt new file mode 100644 index 0000000000..8ddf881133 --- /dev/null +++ b/library/cpp/yconf/CMakeLists.linux-aarch64.txt @@ -0,0 +1,20 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(library-cpp-yconf) +target_link_libraries(library-cpp-yconf PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + library-cpp-charset + library-cpp-logger +) +target_sources(library-cpp-yconf PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/yconf/conf.cpp +) diff --git a/library/cpp/yconf/CMakeLists.linux-x86_64.txt b/library/cpp/yconf/CMakeLists.linux-x86_64.txt new file mode 100644 index 0000000000..8ddf881133 --- /dev/null +++ b/library/cpp/yconf/CMakeLists.linux-x86_64.txt @@ -0,0 +1,20 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(library-cpp-yconf) +target_link_libraries(library-cpp-yconf PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + library-cpp-charset + library-cpp-logger +) +target_sources(library-cpp-yconf PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/yconf/conf.cpp +) diff --git a/library/cpp/yconf/CMakeLists.txt b/library/cpp/yconf/CMakeLists.txt new file mode 100644 index 0000000000..f8b31df0c1 --- /dev/null +++ b/library/cpp/yconf/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) + include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/library/cpp/yconf/CMakeLists.windows-x86_64.txt b/library/cpp/yconf/CMakeLists.windows-x86_64.txt new file mode 100644 index 0000000000..4e5bbf836d --- /dev/null +++ b/library/cpp/yconf/CMakeLists.windows-x86_64.txt @@ -0,0 +1,19 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(library-cpp-yconf) +target_link_libraries(library-cpp-yconf PUBLIC + contrib-libs-cxxsupp + yutil + library-cpp-charset + library-cpp-logger +) +target_sources(library-cpp-yconf PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/yconf/conf.cpp +) diff --git a/mapreduce/CMakeLists.txt b/mapreduce/CMakeLists.txt new file mode 100644 index 0000000000..ef64c4e308 --- /dev/null +++ b/mapreduce/CMakeLists.txt @@ -0,0 +1,9 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(yt) diff --git a/mapreduce/yt/CMakeLists.txt b/mapreduce/yt/CMakeLists.txt new file mode 100644 index 0000000000..38e1c6410c --- /dev/null +++ b/mapreduce/yt/CMakeLists.txt @@ -0,0 +1,9 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(interface) diff --git a/mapreduce/yt/interface/CMakeLists.txt b/mapreduce/yt/interface/CMakeLists.txt new file mode 100644 index 0000000000..6d580ae9ad --- /dev/null +++ b/mapreduce/yt/interface/CMakeLists.txt @@ -0,0 +1,9 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(protos) diff --git a/mapreduce/yt/interface/protos/CMakeLists.darwin-x86_64.txt b/mapreduce/yt/interface/protos/CMakeLists.darwin-x86_64.txt new file mode 100644 index 0000000000..e0f89c3a9a --- /dev/null +++ b/mapreduce/yt/interface/protos/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,57 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) + +add_library(yt-interface-protos) +target_link_libraries(yt-interface-protos PUBLIC + contrib-libs-cxxsupp + yutil + yt_proto-yt-formats + contrib-libs-protobuf +) +target_proto_messages(yt-interface-protos PRIVATE + ${CMAKE_SOURCE_DIR}/mapreduce/yt/interface/protos/extension.proto + ${CMAKE_SOURCE_DIR}/mapreduce/yt/interface/protos/yamr.proto +) +target_proto_addincls(yt-interface-protos + ./ + ${CMAKE_SOURCE_DIR}/ + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src +) +target_proto_outs(yt-interface-protos + --cpp_out=${CMAKE_BINARY_DIR}/ + --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ +) diff --git a/mapreduce/yt/interface/protos/CMakeLists.linux-aarch64.txt b/mapreduce/yt/interface/protos/CMakeLists.linux-aarch64.txt new file mode 100644 index 0000000000..20741a6631 --- /dev/null +++ b/mapreduce/yt/interface/protos/CMakeLists.linux-aarch64.txt @@ -0,0 +1,58 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) + +add_library(yt-interface-protos) +target_link_libraries(yt-interface-protos PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + yt_proto-yt-formats + contrib-libs-protobuf +) +target_proto_messages(yt-interface-protos PRIVATE + ${CMAKE_SOURCE_DIR}/mapreduce/yt/interface/protos/extension.proto + ${CMAKE_SOURCE_DIR}/mapreduce/yt/interface/protos/yamr.proto +) +target_proto_addincls(yt-interface-protos + ./ + ${CMAKE_SOURCE_DIR}/ + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src +) +target_proto_outs(yt-interface-protos + --cpp_out=${CMAKE_BINARY_DIR}/ + --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ +) diff --git a/mapreduce/yt/interface/protos/CMakeLists.linux-x86_64.txt b/mapreduce/yt/interface/protos/CMakeLists.linux-x86_64.txt new file mode 100644 index 0000000000..20741a6631 --- /dev/null +++ b/mapreduce/yt/interface/protos/CMakeLists.linux-x86_64.txt @@ -0,0 +1,58 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) + +add_library(yt-interface-protos) +target_link_libraries(yt-interface-protos PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + yt_proto-yt-formats + contrib-libs-protobuf +) +target_proto_messages(yt-interface-protos PRIVATE + ${CMAKE_SOURCE_DIR}/mapreduce/yt/interface/protos/extension.proto + ${CMAKE_SOURCE_DIR}/mapreduce/yt/interface/protos/yamr.proto +) +target_proto_addincls(yt-interface-protos + ./ + ${CMAKE_SOURCE_DIR}/ + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src +) +target_proto_outs(yt-interface-protos + --cpp_out=${CMAKE_BINARY_DIR}/ + --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ +) diff --git a/mapreduce/yt/interface/protos/CMakeLists.txt b/mapreduce/yt/interface/protos/CMakeLists.txt new file mode 100644 index 0000000000..f8b31df0c1 --- /dev/null +++ b/mapreduce/yt/interface/protos/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) + include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/mapreduce/yt/interface/protos/CMakeLists.windows-x86_64.txt b/mapreduce/yt/interface/protos/CMakeLists.windows-x86_64.txt new file mode 100644 index 0000000000..e0f89c3a9a --- /dev/null +++ b/mapreduce/yt/interface/protos/CMakeLists.windows-x86_64.txt @@ -0,0 +1,57 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) + +add_library(yt-interface-protos) +target_link_libraries(yt-interface-protos PUBLIC + contrib-libs-cxxsupp + yutil + yt_proto-yt-formats + contrib-libs-protobuf +) +target_proto_messages(yt-interface-protos PRIVATE + ${CMAKE_SOURCE_DIR}/mapreduce/yt/interface/protos/extension.proto + ${CMAKE_SOURCE_DIR}/mapreduce/yt/interface/protos/yamr.proto +) +target_proto_addincls(yt-interface-protos + ./ + ${CMAKE_SOURCE_DIR}/ + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src +) +target_proto_outs(yt-interface-protos + --cpp_out=${CMAKE_BINARY_DIR}/ + --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ +) diff --git a/ydb/library/yql/public/CMakeLists.txt b/ydb/library/yql/public/CMakeLists.txt index 823731a6d8..812b435b79 100644 --- a/ydb/library/yql/public/CMakeLists.txt +++ b/ydb/library/yql/public/CMakeLists.txt @@ -9,5 +9,6 @@ add_subdirectory(decimal) add_subdirectory(fastcheck) add_subdirectory(issue) +add_subdirectory(purecalc) add_subdirectory(types) add_subdirectory(udf) diff --git a/ydb/library/yql/public/purecalc/CMakeLists.darwin-x86_64.txt b/ydb/library/yql/public/purecalc/CMakeLists.darwin-x86_64.txt new file mode 100644 index 0000000000..a417180394 --- /dev/null +++ b/ydb/library/yql/public/purecalc/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,27 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(common) +add_subdirectory(examples) +add_subdirectory(helpers) +add_subdirectory(io_specs) +add_subdirectory(ut) + +add_library(yql-public-purecalc) +target_compile_options(yql-public-purecalc PRIVATE + -DUSE_CURRENT_UDF_ABI_VERSION +) +target_link_libraries(yql-public-purecalc PUBLIC + contrib-libs-cxxsupp + yutil + udf-service-exception_policy + public-purecalc-common +) +target_sources(yql-public-purecalc PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/purecalc.cpp +) diff --git a/ydb/library/yql/public/purecalc/CMakeLists.linux-aarch64.txt b/ydb/library/yql/public/purecalc/CMakeLists.linux-aarch64.txt new file mode 100644 index 0000000000..7d72508c1f --- /dev/null +++ b/ydb/library/yql/public/purecalc/CMakeLists.linux-aarch64.txt @@ -0,0 +1,28 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(common) +add_subdirectory(examples) +add_subdirectory(helpers) +add_subdirectory(io_specs) +add_subdirectory(ut) + +add_library(yql-public-purecalc) +target_compile_options(yql-public-purecalc PRIVATE + -DUSE_CURRENT_UDF_ABI_VERSION +) +target_link_libraries(yql-public-purecalc PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + udf-service-exception_policy + public-purecalc-common +) +target_sources(yql-public-purecalc PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/purecalc.cpp +) diff --git a/ydb/library/yql/public/purecalc/CMakeLists.linux-x86_64.txt b/ydb/library/yql/public/purecalc/CMakeLists.linux-x86_64.txt new file mode 100644 index 0000000000..7d72508c1f --- /dev/null +++ b/ydb/library/yql/public/purecalc/CMakeLists.linux-x86_64.txt @@ -0,0 +1,28 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(common) +add_subdirectory(examples) +add_subdirectory(helpers) +add_subdirectory(io_specs) +add_subdirectory(ut) + +add_library(yql-public-purecalc) +target_compile_options(yql-public-purecalc PRIVATE + -DUSE_CURRENT_UDF_ABI_VERSION +) +target_link_libraries(yql-public-purecalc PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + udf-service-exception_policy + public-purecalc-common +) +target_sources(yql-public-purecalc PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/purecalc.cpp +) diff --git a/ydb/library/yql/public/purecalc/CMakeLists.txt b/ydb/library/yql/public/purecalc/CMakeLists.txt new file mode 100644 index 0000000000..f8b31df0c1 --- /dev/null +++ b/ydb/library/yql/public/purecalc/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) + include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/ydb/library/yql/public/purecalc/CMakeLists.windows-x86_64.txt b/ydb/library/yql/public/purecalc/CMakeLists.windows-x86_64.txt new file mode 100644 index 0000000000..a417180394 --- /dev/null +++ b/ydb/library/yql/public/purecalc/CMakeLists.windows-x86_64.txt @@ -0,0 +1,27 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(common) +add_subdirectory(examples) +add_subdirectory(helpers) +add_subdirectory(io_specs) +add_subdirectory(ut) + +add_library(yql-public-purecalc) +target_compile_options(yql-public-purecalc PRIVATE + -DUSE_CURRENT_UDF_ABI_VERSION +) +target_link_libraries(yql-public-purecalc PUBLIC + contrib-libs-cxxsupp + yutil + udf-service-exception_policy + public-purecalc-common +) +target_sources(yql-public-purecalc PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/purecalc.cpp +) diff --git a/ydb/library/yql/public/purecalc/common/CMakeLists.darwin-x86_64.txt b/ydb/library/yql/public/purecalc/common/CMakeLists.darwin-x86_64.txt new file mode 100644 index 0000000000..301054c4de --- /dev/null +++ b/ydb/library/yql/public/purecalc/common/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,65 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_enum_parser_bin + TOOL_enum_parser_dependency + tools/enum_parser/enum_parser + enum_parser +) + +add_library(public-purecalc-common) +target_compile_options(public-purecalc-common PRIVATE + -DUSE_CURRENT_UDF_ABI_VERSION +) +target_link_libraries(public-purecalc-common PUBLIC + contrib-libs-cxxsupp + yutil + yql-sql-pg + library-yql-ast + yql-core-services + core-services-mounts + yql-core-user_data + minikql-comp_nodes-llvm + yql-utils-backtrace + yql-utils-log + library-yql-core + yql-core-type_ann + yql-parser-pg_wrapper + providers-common-codec + providers-common-comp_nodes + providers-common-mkql + providers-common-provider + common-schema-expr + providers-common-udf_resolve + tools-enum_parser-enum_serialization_runtime +) +target_sources(public-purecalc-common PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/compile_mkql.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/fwd.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/inspect_input.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/interface.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/logger_init.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/names.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/processor_mode.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/program_factory.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/transformations/align_output_schema.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/transformations/extract_used_columns.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/transformations/output_columns_filter.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/transformations/replace_table_reads.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/transformations/type_annotation.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/type_from_schema.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/worker.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/worker_factory.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/wrappers.cpp +) +generate_enum_serilization(public-purecalc-common + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/interface.h + INCLUDE_HEADERS + ydb/library/yql/public/purecalc/common/interface.h +) diff --git a/ydb/library/yql/public/purecalc/common/CMakeLists.linux-aarch64.txt b/ydb/library/yql/public/purecalc/common/CMakeLists.linux-aarch64.txt new file mode 100644 index 0000000000..b6d800550b --- /dev/null +++ b/ydb/library/yql/public/purecalc/common/CMakeLists.linux-aarch64.txt @@ -0,0 +1,66 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_enum_parser_bin + TOOL_enum_parser_dependency + tools/enum_parser/enum_parser + enum_parser +) + +add_library(public-purecalc-common) +target_compile_options(public-purecalc-common PRIVATE + -DUSE_CURRENT_UDF_ABI_VERSION +) +target_link_libraries(public-purecalc-common PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + yql-sql-pg + library-yql-ast + yql-core-services + core-services-mounts + yql-core-user_data + minikql-comp_nodes-llvm + yql-utils-backtrace + yql-utils-log + library-yql-core + yql-core-type_ann + yql-parser-pg_wrapper + providers-common-codec + providers-common-comp_nodes + providers-common-mkql + providers-common-provider + common-schema-expr + providers-common-udf_resolve + tools-enum_parser-enum_serialization_runtime +) +target_sources(public-purecalc-common PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/compile_mkql.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/fwd.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/inspect_input.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/interface.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/logger_init.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/names.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/processor_mode.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/program_factory.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/transformations/align_output_schema.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/transformations/extract_used_columns.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/transformations/output_columns_filter.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/transformations/replace_table_reads.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/transformations/type_annotation.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/type_from_schema.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/worker.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/worker_factory.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/wrappers.cpp +) +generate_enum_serilization(public-purecalc-common + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/interface.h + INCLUDE_HEADERS + ydb/library/yql/public/purecalc/common/interface.h +) diff --git a/ydb/library/yql/public/purecalc/common/CMakeLists.linux-x86_64.txt b/ydb/library/yql/public/purecalc/common/CMakeLists.linux-x86_64.txt new file mode 100644 index 0000000000..b6d800550b --- /dev/null +++ b/ydb/library/yql/public/purecalc/common/CMakeLists.linux-x86_64.txt @@ -0,0 +1,66 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_enum_parser_bin + TOOL_enum_parser_dependency + tools/enum_parser/enum_parser + enum_parser +) + +add_library(public-purecalc-common) +target_compile_options(public-purecalc-common PRIVATE + -DUSE_CURRENT_UDF_ABI_VERSION +) +target_link_libraries(public-purecalc-common PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + yql-sql-pg + library-yql-ast + yql-core-services + core-services-mounts + yql-core-user_data + minikql-comp_nodes-llvm + yql-utils-backtrace + yql-utils-log + library-yql-core + yql-core-type_ann + yql-parser-pg_wrapper + providers-common-codec + providers-common-comp_nodes + providers-common-mkql + providers-common-provider + common-schema-expr + providers-common-udf_resolve + tools-enum_parser-enum_serialization_runtime +) +target_sources(public-purecalc-common PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/compile_mkql.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/fwd.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/inspect_input.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/interface.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/logger_init.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/names.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/processor_mode.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/program_factory.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/transformations/align_output_schema.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/transformations/extract_used_columns.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/transformations/output_columns_filter.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/transformations/replace_table_reads.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/transformations/type_annotation.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/type_from_schema.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/worker.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/worker_factory.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/wrappers.cpp +) +generate_enum_serilization(public-purecalc-common + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/interface.h + INCLUDE_HEADERS + ydb/library/yql/public/purecalc/common/interface.h +) diff --git a/ydb/library/yql/public/purecalc/common/CMakeLists.txt b/ydb/library/yql/public/purecalc/common/CMakeLists.txt new file mode 100644 index 0000000000..f8b31df0c1 --- /dev/null +++ b/ydb/library/yql/public/purecalc/common/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) + include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/ydb/library/yql/public/purecalc/common/CMakeLists.windows-x86_64.txt b/ydb/library/yql/public/purecalc/common/CMakeLists.windows-x86_64.txt new file mode 100644 index 0000000000..301054c4de --- /dev/null +++ b/ydb/library/yql/public/purecalc/common/CMakeLists.windows-x86_64.txt @@ -0,0 +1,65 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_enum_parser_bin + TOOL_enum_parser_dependency + tools/enum_parser/enum_parser + enum_parser +) + +add_library(public-purecalc-common) +target_compile_options(public-purecalc-common PRIVATE + -DUSE_CURRENT_UDF_ABI_VERSION +) +target_link_libraries(public-purecalc-common PUBLIC + contrib-libs-cxxsupp + yutil + yql-sql-pg + library-yql-ast + yql-core-services + core-services-mounts + yql-core-user_data + minikql-comp_nodes-llvm + yql-utils-backtrace + yql-utils-log + library-yql-core + yql-core-type_ann + yql-parser-pg_wrapper + providers-common-codec + providers-common-comp_nodes + providers-common-mkql + providers-common-provider + common-schema-expr + providers-common-udf_resolve + tools-enum_parser-enum_serialization_runtime +) +target_sources(public-purecalc-common PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/compile_mkql.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/fwd.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/inspect_input.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/interface.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/logger_init.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/names.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/processor_mode.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/program_factory.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/transformations/align_output_schema.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/transformations/extract_used_columns.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/transformations/output_columns_filter.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/transformations/replace_table_reads.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/transformations/type_annotation.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/type_from_schema.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/worker.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/worker_factory.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/wrappers.cpp +) +generate_enum_serilization(public-purecalc-common + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/common/interface.h + INCLUDE_HEADERS + ydb/library/yql/public/purecalc/common/interface.h +) diff --git a/ydb/library/yql/public/purecalc/common/compile_mkql.cpp b/ydb/library/yql/public/purecalc/common/compile_mkql.cpp new file mode 100644 index 0000000000..7a9946890c --- /dev/null +++ b/ydb/library/yql/public/purecalc/common/compile_mkql.cpp @@ -0,0 +1,115 @@ +#include "compile_mkql.h" + +#include <ydb/library/yql/providers/common/mkql/yql_provider_mkql.h> +#include <ydb/library/yql/providers/common/mkql/yql_type_mkql.h> +#include <ydb/library/yql/core/yql_user_data_storage.h> +#include <ydb/library/yql/public/purecalc/common/names.h> + +#include <util/stream/file.h> + +namespace NYql::NPureCalc { + +namespace { + +NCommon::IMkqlCallableCompiler::TCompiler MakeSelfCallableCompiler() { + return [](const TExprNode& node, NCommon::TMkqlBuildContext& ctx) { + MKQL_ENSURE(node.ChildrenSize() == 1, "Self takes exactly 1 argument"); + const auto* argument = node.Child(0); + MKQL_ENSURE(argument->IsAtom(), "Self argument must be atom"); + ui32 inputIndex = 0; + MKQL_ENSURE(TryFromString(argument->Content(), inputIndex), "Self argument must be UI32"); + auto type = NCommon::BuildType(node, *node.GetTypeAnn(), ctx.ProgramBuilder); + NKikimr::NMiniKQL::TCallableBuilder call(ctx.ProgramBuilder.GetTypeEnvironment(), node.Content(), type); + call.Add(ctx.ProgramBuilder.NewDataLiteral<ui32>(inputIndex)); + return NKikimr::NMiniKQL::TRuntimeNode(call.Build(), false); + }; +} + +NCommon::IMkqlCallableCompiler::TCompiler MakeFilePathCallableCompiler(const TUserDataTable& userData) { + return [&](const TExprNode& node, NCommon::TMkqlBuildContext& ctx) { + const TString name(node.Child(0)->Content()); + auto block = TUserDataStorage::FindUserDataBlock(userData, TUserDataKey::File(name)); + if (!block) { + auto blockKey = TUserDataKey::File(GetDefaultFilePrefix() + name); + block = TUserDataStorage::FindUserDataBlock(userData, blockKey); + } + MKQL_ENSURE(block, "file not found: " << name); + MKQL_ENSURE(block->Type == EUserDataType::PATH, + "FilePath not supported for non-filesystem user data, name: " + << name << ", block type: " << block->Type); + return ctx.ProgramBuilder.NewDataLiteral<NKikimr::NUdf::EDataSlot::String>(block->Data); + }; +} + +NCommon::IMkqlCallableCompiler::TCompiler MakeFileContentCallableCompiler(const TUserDataTable& userData) { + return [&](const TExprNode& node, NCommon::TMkqlBuildContext& ctx) { + const TString name(node.Child(0)->Content()); + auto block = TUserDataStorage::FindUserDataBlock(userData, TUserDataKey::File(name)); + if (!block) { + auto blockKey = TUserDataKey::File(GetDefaultFilePrefix() + name); + block = TUserDataStorage::FindUserDataBlock(userData, blockKey); + } + MKQL_ENSURE(block, "file not found: " << name); + if (block->Type == EUserDataType::PATH) { + auto content = TFileInput(block->Data).ReadAll(); + return ctx.ProgramBuilder.NewDataLiteral<NKikimr::NUdf::EDataSlot::String>(content); + } else if (block->Type == EUserDataType::RAW_INLINE_DATA) { + return ctx.ProgramBuilder.NewDataLiteral<NKikimr::NUdf::EDataSlot::String>(block->Data); + } else { + // TODO support EUserDataType::URL + MKQL_ENSURE(false, "user data blocks of type URL are not supported by FileContent: " << name); + Y_UNREACHABLE(); + } + }; +} + +NCommon::IMkqlCallableCompiler::TCompiler MakeFolderPathCallableCompiler(const TUserDataTable& userData) { + return [&](const TExprNode& node, NCommon::TMkqlBuildContext& ctx) { + const TString name(node.Child(0)->Content()); + auto folderName = TUserDataStorage::MakeFolderName(name); + TMaybe<TString> folderPath; + for (const auto& x : userData) { + if (!x.first.Alias().StartsWith(folderName)) { + continue; + } + + MKQL_ENSURE(x.second.Type == EUserDataType::PATH, + "FilePath not supported for non-file data block, name: " + << x.first.Alias() << ", block type: " << x.second.Type); + + auto pathPrefixLength = x.second.Data.size() - (x.first.Alias().size() - folderName.size()); + auto newFolderPath = x.second.Data.substr(0, pathPrefixLength); + if (!folderPath) { + folderPath = newFolderPath; + } else { + MKQL_ENSURE(*folderPath == newFolderPath, + "file " << x.second.Data << " is out of directory " << *folderPath); + } + } + return ctx.ProgramBuilder.NewDataLiteral<NKikimr::NUdf::EDataSlot::String>(*folderPath); + }; +} + +} + +NKikimr::NMiniKQL::TRuntimeNode CompileMkql(const TExprNode::TPtr& exprRoot, TExprContext& exprCtx, + const NKikimr::NMiniKQL::IFunctionRegistry& funcRegistry, const NKikimr::NMiniKQL::TTypeEnvironment& env, const TUserDataTable& userData) +{ + NCommon::TMkqlCommonCallableCompiler compiler; + + compiler.AddCallable(PurecalcInputCallableName, MakeSelfCallableCompiler()); + compiler.OverrideCallable("FileContent", MakeFileContentCallableCompiler(userData)); + compiler.OverrideCallable("FilePath", MakeFilePathCallableCompiler(userData)); + compiler.OverrideCallable("FolderPath", MakeFolderPathCallableCompiler(userData)); + + // Prepare build context + + NKikimr::NMiniKQL::TProgramBuilder pgmBuilder(env, funcRegistry); + NCommon::TMkqlBuildContext buildCtx(compiler, pgmBuilder, exprCtx); + + // Build the root MKQL node + + return NCommon::MkqlBuildExpr(*exprRoot, buildCtx); +} + +} // NYql::NPureCalc diff --git a/ydb/library/yql/public/purecalc/common/compile_mkql.h b/ydb/library/yql/public/purecalc/common/compile_mkql.h new file mode 100644 index 0000000000..566459d396 --- /dev/null +++ b/ydb/library/yql/public/purecalc/common/compile_mkql.h @@ -0,0 +1,17 @@ +#pragma once + +#include <ydb/library/yql/public/purecalc/common/interface.h> +#include <ydb/library/yql/minikql/mkql_node.h> +#include <ydb/library/yql/ast/yql_expr.h> +#include <ydb/library/yql/core/yql_user_data.h> + +namespace NYql { + namespace NPureCalc { + /** + * Compile expr to mkql byte-code + */ + + NKikimr::NMiniKQL::TRuntimeNode CompileMkql(const TExprNode::TPtr& exprRoot, TExprContext& exprCtx, + const NKikimr::NMiniKQL::IFunctionRegistry& funcRegistry, const NKikimr::NMiniKQL::TTypeEnvironment& env, const TUserDataTable& userData); + } +} diff --git a/ydb/library/yql/public/purecalc/common/fwd.cpp b/ydb/library/yql/public/purecalc/common/fwd.cpp new file mode 100644 index 0000000000..4214b6df83 --- /dev/null +++ b/ydb/library/yql/public/purecalc/common/fwd.cpp @@ -0,0 +1 @@ +#include "fwd.h" diff --git a/ydb/library/yql/public/purecalc/common/fwd.h b/ydb/library/yql/public/purecalc/common/fwd.h new file mode 100644 index 0000000000..22df90a6b2 --- /dev/null +++ b/ydb/library/yql/public/purecalc/common/fwd.h @@ -0,0 +1,56 @@ +#pragma once + +#include <util/generic/fwd.h> +#include <memory> + +namespace NYql::NPureCalc { + class TCompileError; + + template <typename> + class IConsumer; + + template <typename> + class IStream; + + class IProgramFactory; + + class IWorkerFactory; + + class IPullStreamWorkerFactory; + + class IPullListWorkerFactory; + + class IPushStreamWorkerFactory; + + class IWorker; + + class IPullStreamWorker; + + class IPullListWorker; + + class IPushStreamWorker; + + class TInputSpecBase; + + class TOutputSpecBase; + + class IProgram; + + template <typename, typename, typename> + class TProgramCommon; + + template <typename, typename> + class TPullStreamProgram; + + template <typename, typename> + class TPullListProgram; + + template <typename, typename> + class TPushStreamProgram; + + using IProgramFactoryPtr = TIntrusivePtr<IProgramFactory>; + using IWorkerFactoryPtr = std::shared_ptr<IWorkerFactory>; + using IPullStreamWorkerFactoryPtr = std::shared_ptr<IPullStreamWorkerFactory>; + using IPullListWorkerFactoryPtr = std::shared_ptr<IPullListWorkerFactory>; + using IPushStreamWorkerFactoryPtr = std::shared_ptr<IPushStreamWorkerFactory>; +} diff --git a/ydb/library/yql/public/purecalc/common/inspect_input.cpp b/ydb/library/yql/public/purecalc/common/inspect_input.cpp new file mode 100644 index 0000000000..c8fbb6cd58 --- /dev/null +++ b/ydb/library/yql/public/purecalc/common/inspect_input.cpp @@ -0,0 +1,33 @@ +#include "inspect_input.h" + +#include <ydb/library/yql/core/yql_expr_type_annotation.h> + +namespace NYql::NPureCalc { + bool TryFetchInputIndexFromSelf(const TExprNode& node, TExprContext& ctx, ui32 inputsCount, ui32& result) { + TIssueScopeGuard issueSope(ctx.IssueManager, [&]() { + return MakeIntrusive<TIssue>(ctx.GetPosition(node.Pos()), TStringBuilder() << "At function: " << node.Content()); + }); + + if (!EnsureArgsCount(node, 1, ctx)) { + return false; + } + + if (!EnsureAtom(*node.Child(0), ctx)) { + return false; + } + + if (!TryFromString(node.Child(0)->Content(), result)) { + auto message = TStringBuilder() << "Index " << TString{node.Child(0)->Content()}.Quote() << " isn't UI32"; + ctx.AddError(TIssue(ctx.GetPosition(node.Child(0)->Pos()), std::move(message))); + return false; + } + + if (result >= inputsCount) { + auto message = TStringBuilder() << "Invalid input index: " << result << " is out of range [0;" << inputsCount << ")"; + ctx.AddError(TIssue(ctx.GetPosition(node.Child(0)->Pos()), std::move(message))); + return false; + } + + return true; + } +} diff --git a/ydb/library/yql/public/purecalc/common/inspect_input.h b/ydb/library/yql/public/purecalc/common/inspect_input.h new file mode 100644 index 0000000000..fbc4413227 --- /dev/null +++ b/ydb/library/yql/public/purecalc/common/inspect_input.h @@ -0,0 +1,7 @@ +#pragma once + +#include <ydb/library/yql/ast/yql_expr.h> + +namespace NYql::NPureCalc { + bool TryFetchInputIndexFromSelf(const TExprNode&, TExprContext&, ui32, ui32&); +} diff --git a/ydb/library/yql/public/purecalc/common/interface.cpp b/ydb/library/yql/public/purecalc/common/interface.cpp new file mode 100644 index 0000000000..b22c65e482 --- /dev/null +++ b/ydb/library/yql/public/purecalc/common/interface.cpp @@ -0,0 +1,116 @@ +#include "interface.h" + +#include <ydb/library/yql/providers/common/codec/yql_codec_type_flags.h> +#include <ydb/library/yql/public/purecalc/common/logger_init.h> +#include <ydb/library/yql/public/purecalc/common/program_factory.h> + +using namespace NYql; +using namespace NYql::NPureCalc; + +TLoggingOptions::TLoggingOptions() + : LogLevel_(ELogPriority::TLOG_ERR) + , LogDestination(&Clog) +{ +} + +TLoggingOptions& TLoggingOptions::SetLogLevel(ELogPriority logLevel) { + LogLevel_ = logLevel; + return *this; +} + +TLoggingOptions& TLoggingOptions::SetLogDestination(IOutputStream* logDestination) { + LogDestination = logDestination; + return *this; +} + +TProgramFactoryOptions::TProgramFactoryOptions() + : UdfsDir_("") + , UserData_() + , LLVMSettings("OFF") + , CountersProvider(nullptr) + , NativeYtTypeFlags(0) + , UseSystemColumns(false) + , UseWorkerPool(true) +{ +} + +TProgramFactoryOptions& TProgramFactoryOptions::SetUDFsDir(TStringBuf dir) { + UdfsDir_ = dir; + return *this; +} + +TProgramFactoryOptions& TProgramFactoryOptions::AddLibrary(NUserData::EDisposition disposition, TStringBuf name, TStringBuf content) { + auto& ref = UserData_.emplace_back(); + + ref.Type_ = NUserData::EType::LIBRARY; + ref.Disposition_ = disposition; + ref.Name_ = name; + ref.Content_ = content; + + return *this; +} + +TProgramFactoryOptions& TProgramFactoryOptions::AddFile(NUserData::EDisposition disposition, TStringBuf name, TStringBuf content) { + auto& ref = UserData_.emplace_back(); + + ref.Type_ = NUserData::EType::FILE; + ref.Disposition_ = disposition; + ref.Name_ = name; + ref.Content_ = content; + + return *this; +} + +TProgramFactoryOptions& TProgramFactoryOptions::AddUDF(NUserData::EDisposition disposition, TStringBuf name, TStringBuf content) { + auto& ref = UserData_.emplace_back(); + + ref.Type_ = NUserData::EType::UDF; + ref.Disposition_ = disposition; + ref.Name_ = name; + ref.Content_ = content; + + return *this; +} + +TProgramFactoryOptions& TProgramFactoryOptions::SetLLVMSettings(TStringBuf llvm_settings) { + LLVMSettings = llvm_settings; + return *this; +} + +TProgramFactoryOptions& TProgramFactoryOptions::SetCountersProvider(NKikimr::NUdf::ICountersProvider* countersProvider) { + CountersProvider = countersProvider; + return *this; +} + +TProgramFactoryOptions& TProgramFactoryOptions::SetUseNativeYtTypes(bool useNativeTypes) { + NativeYtTypeFlags = useNativeTypes ? NTCF_PRODUCTION : NTCF_NONE; + return *this; +} + +TProgramFactoryOptions& TProgramFactoryOptions::SetNativeYtTypeFlags(ui64 nativeTypeFlags) { + NativeYtTypeFlags = nativeTypeFlags; + return *this; +} + +TProgramFactoryOptions& TProgramFactoryOptions::SetDeterministicTimeProviderSeed(TMaybe<ui64> seed) { + DeterministicTimeProviderSeed = seed; + return *this; +} + +TProgramFactoryOptions& TProgramFactoryOptions::SetUseSystemColumns(bool useSystemColumns) { + UseSystemColumns = useSystemColumns; + return *this; +} + +TProgramFactoryOptions& TProgramFactoryOptions::SetUseWorkerPool(bool useWorkerPool) { + UseWorkerPool = useWorkerPool; + return *this; +} + +void NYql::NPureCalc::ConfigureLogging(const TLoggingOptions& options) { + InitLogging(options); +} + +IProgramFactoryPtr NYql::NPureCalc::MakeProgramFactory(const TProgramFactoryOptions& options) { + return new TProgramFactory(options); +} diff --git a/ydb/library/yql/public/purecalc/common/interface.h b/ydb/library/yql/public/purecalc/common/interface.h new file mode 100644 index 0000000000..49bfd8c22a --- /dev/null +++ b/ydb/library/yql/public/purecalc/common/interface.h @@ -0,0 +1,1137 @@ +#pragma once + +#include "fwd.h" +#include "wrappers.h" + +#include <ydb/library/yql/core/user_data/yql_user_data.h> + +#include <ydb/library/yql/public/udf/udf_value.h> +#include <ydb/library/yql/public/udf/udf_counter.h> +#include <ydb/library/yql/public/udf/udf_registrator.h> + +#include <ydb/library/yql/public/issue/yql_issue.h> +#include <library/cpp/yson/node/node.h> + +#include <library/cpp/logger/priority.h> + +#include <util/generic/ptr.h> +#include <util/generic/maybe.h> +#include <util/generic/hash_set.h> +#include <util/generic/string.h> +#include <util/stream/output.h> + +class ITimeProvider; + +namespace NKikimr { + namespace NMiniKQL { + class TScopedAlloc; + class IComputationGraph; + class IFunctionRegistry; + class TTypeEnvironment; + class TType; + class TStructType; + } +} + +namespace NYql { + namespace NPureCalc { + /** + * SQL or s-expression translation error. + */ + class TCompileError: public yexception { + private: + TString Yql_; + TString Issues_; + + public: + // TODO: maybe accept an actual list of issues here? + // See https://a.yandex-team.ru/arc/review/439403/details#comment-778237 + TCompileError(TString yql, TString issues) + : Yql_(std::move(yql)) + , Issues_(std::move(issues)) + { + } + + public: + /** + * Get the sql query which caused the error (if there is one available). + */ + const TString& GetYql() const { + return Yql_; + } + + /** + * Get detailed description for all errors and warnings that happened during sql translation. + */ + const TString& GetIssues() const { + return Issues_; + } + }; + + //////////////////////////////////////////////////////////////////////////////////////////////////// + + /** + * A generic input stream of objects. + */ + template <typename T> + class IStream { + public: + virtual ~IStream() = default; + + public: + /** + * Pops and returns a next value in the stream. If the stream is finished, should return some sentinel object. + * + * Depending on return type, this function may not transfer object ownership to a user. + * Thus, the stream may manage the returned object * itself. + * That is, the returned object's lifetime may be bound to the input stream lifetime; it may be destroyed + * upon calling Fetch() or upon destroying the stream, whichever happens first. + */ + virtual T Fetch() = 0; + }; + + /** + * Create a new stream which applies the given functor to the elements of the original stream. + */ + template <typename TOld, typename TNew, typename TFunctor> + inline THolder<IStream<TNew>> MapStream(THolder<IStream<TOld>> stream, TFunctor functor) { + return THolder(new NPrivate::TMappingStream<TNew, TOld, TFunctor>(std::move(stream), std::move(functor))); + }; + + /** + * Convert stream of objects into a stream of potentially incompatible objects. + * + * This conversion applies static cast to the output of the original stream. Use with caution! + */ + /// @{ + template < + typename TNew, typename TOld, + std::enable_if_t<!std::is_same<TNew, TOld>::value>* = nullptr> + inline THolder<IStream<TNew>> ConvertStreamUnsafe(THolder<IStream<TOld>> stream) { + return MapStream<TOld, TNew>(std::move(stream), [](TOld x) -> TNew { return static_cast<TNew>(x); }); + } + template <typename T> + inline THolder<IStream<T>> ConvertStreamUnsafe(THolder<IStream<T>> stream) { + return stream; + } + /// @} + + /** + * Convert stream of objects into a stream of compatible objects. + * + * Note: each conversion adds one level of indirection so avoid them if possible. + */ + template <typename TNew, typename TOld, std::enable_if_t<std::is_convertible<TOld, TNew>::value>* = nullptr> + inline THolder<IStream<TNew>> ConvertStream(THolder<IStream<TOld>> stream) { + return ConvertStreamUnsafe<TNew, TOld>(std::move(stream)); + } + + //////////////////////////////////////////////////////////////////////////////////////////////////// + + /** + * A generic push consumer. + */ + template <typename T> + class IConsumer { + public: + virtual ~IConsumer() = default; + + public: + /** + * Feed an object to consumer. + * + * Depending on argument type, the consumer may not take ownership of the passed object; + * in that case it is the caller responsibility to manage the object lifetime after passing it to this method. + * + * The passed object can be destroyed after the consumer returns from this function; the consumer should + * not store pointer to the passed object or the passed object itself without taking all necessary precautions + * to ensure that the pointer or the object stays valid after returning. + */ + virtual void OnObject(T) = 0; + + /** + * Close the consumer and run finalization logic. Calling OnObject after calling this function is an error. + */ + virtual void OnFinish() = 0; + }; + + /** + * Create a new consumer which applies the given functor to objects before . + */ + template <typename TOld, typename TNew, typename TFunctor> + inline THolder<IConsumer<TNew>> MapConsumer(THolder<IConsumer<TOld>> stream, TFunctor functor) { + return THolder(new NPrivate::TMappingConsumer<TNew, TOld, TFunctor>(std::move(stream), std::move(functor))); + }; + + + /** + * Convert consumer of objects into a consumer of potentially incompatible objects. + * + * This conversion applies static cast to the input value. Use with caution. + */ + /// @{ + template < + typename TNew, typename TOld, + std::enable_if_t<!std::is_same<TNew, TOld>::value>* = nullptr> + inline THolder<IConsumer<TNew>> ConvertConsumerUnsafe(THolder<IConsumer<TOld>> consumer) { + return MapConsumer<TOld, TNew>(std::move(consumer), [](TNew x) -> TOld { return static_cast<TOld>(x); }); + } + template <typename T> + inline THolder<IConsumer<T>> ConvertConsumerUnsafe(THolder<IConsumer<T>> consumer) { + return consumer; + } + /// @} + + /** + * Convert consumer of objects into a consumer of compatible objects. + * + * Note: each conversion adds one level of indirection so avoid them if possible. + */ + template <typename TNew, typename TOld, std::enable_if_t<std::is_convertible<TNew, TOld>::value>* = nullptr> + inline THolder<IConsumer<TNew>> ConvertConsumer(THolder<IConsumer<TOld>> consumer) { + return ConvertConsumerUnsafe<TNew, TOld>(std::move(consumer)); + } + + /** + * Create a consumer which holds a non-owning pointer to the given consumer + * and passes all messages to the latter. + */ + template <typename T, typename C> + THolder<NPrivate::TNonOwningConsumer<T, C>> MakeNonOwningConsumer(C consumer) { + return MakeHolder<NPrivate::TNonOwningConsumer<T, C>>(consumer); + } + + //////////////////////////////////////////////////////////////////////////////////////////////////// + + /** + * Logging options. + */ + struct TLoggingOptions final { + public: + /// Logging level for messages generated during compilation. + ELogPriority LogLevel_; // TODO: rename to LogLevel + + /// Where to write log messages. + IOutputStream* LogDestination; + + public: + TLoggingOptions(); + /** + * Set a new logging level. + * + * @return reference to self, to allow method chaining. + */ + TLoggingOptions& SetLogLevel(ELogPriority); + + /** + * Set a new logging destination. + * + * @return reference to self, to allow method chaining. + */ + TLoggingOptions& SetLogDestination(IOutputStream*); + }; + + /** + * General options for program factory. + */ + struct TProgramFactoryOptions final { + public: + /// Path to a directory with compiled UDFs. Leave empty to disable loading external UDFs. + TString UdfsDir_; // TODO: rename to UDFDir + + /// List of available external resources, e.g. files, UDFs, libraries. + TVector<NUserData::TUserData> UserData_; // TODO: rename to UserData + + /// LLVM settings. Assign "OFF" to disable LLVM, empty string for default settings. + TString LLVMSettings; + + /// Provider for generic counters which can be used to export statistics from UDFs. + NKikimr::NUdf::ICountersProvider* CountersProvider; + + /// YT Type V3 flags for Skiff/Yson serialization. + ui64 NativeYtTypeFlags; + + /// Seed for deterministic time provider + TMaybe<ui64> DeterministicTimeProviderSeed; + + /// Use special system columns to support tables naming (supports non empty ``TablePath()``/``TableName()``) + bool UseSystemColumns; + + /// Reuse allocated workers + bool UseWorkerPool; + + public: + TProgramFactoryOptions(); + + public: + /** + * Set a new path to a directory with UDFs. + * + * @return reference to self, to allow method chaining. + */ + TProgramFactoryOptions& SetUDFsDir(TStringBuf); + + /** + * Add a new library to the UserData list. + * + * @param disposition where the resource resides, e.g. on filesystem, in memory, etc. + * NB: URL disposition is not supported. + * @param name name of the resource. + * @param content depending on disposition, either path to the resource or its content. + * @return reference to self, to allow method chaining. + */ + TProgramFactoryOptions& AddLibrary(NUserData::EDisposition disposition, TStringBuf name, TStringBuf content); + + /** + * Add a new file to the UserData list. + * + * @param disposition where the resource resides, e.g. on filesystem, in memory, etc. + * NB: URL disposition is not supported. + * @param name name of the resource. + * @param content depending on disposition, either path to the resource or its content. + * @return reference to self, to allow method chaining. + */ + TProgramFactoryOptions& AddFile(NUserData::EDisposition disposition, TStringBuf name, TStringBuf content); + + /** + * Add a new UDF to the UserData list. + * + * @param disposition where the resource resides, e.g. on filesystem, in memory, etc. + * NB: URL disposition is not supported. + * @param name name of the resource. + * @param content depending on disposition, either path to the resource or its content. + * @return reference to self, to allow method chaining. + */ + TProgramFactoryOptions& AddUDF(NUserData::EDisposition disposition, TStringBuf name, TStringBuf content); + + /** + * Set new LLVM settings. + * + * @return reference to self, to allow method chaining. + */ + TProgramFactoryOptions& SetLLVMSettings(TStringBuf llvm_settings); + + /** + * Set new counters provider. Passed pointer should stay alive for as long as the processor factory + * stays alive. + * + * @return reference to self, to allow method chaining. + */ + TProgramFactoryOptions& SetCountersProvider(NKikimr::NUdf::ICountersProvider* countersProvider); + + /** + * Set new YT Type V3 mode. Deprecated method. Use SetNativeYtTypeFlags instead + * + * @return reference to self, to allow method chaining. + */ + TProgramFactoryOptions& SetUseNativeYtTypes(bool useNativeTypes); + + /** + * Set YT Type V3 flags. + * + * @return reference to self, to allow method chaining. + */ + TProgramFactoryOptions& SetNativeYtTypeFlags(ui64 nativeTypeFlags); + + /** + * Set seed for deterministic time provider. + * + * @return reference to self, to allow method chaining. + */ + TProgramFactoryOptions& SetDeterministicTimeProviderSeed(TMaybe<ui64> seed); + + /** + * Set new flag whether to allow using system columns or not. + * + * @return reference to self, to allow method chaining. + */ + TProgramFactoryOptions& SetUseSystemColumns(bool useSystemColumns); + + /** + * Set new flag whether to allow reusing workers or not. + * + * @return reference to self, to allow method chaining. + */ + TProgramFactoryOptions& SetUseWorkerPool(bool useWorkerPool); + }; + + //////////////////////////////////////////////////////////////////////////////////////////////////// + + /** + * What exactly are we parsing: SQL or an s-expression. + */ + enum class ETranslationMode { + SQL /* "SQL" */, + SExpr /* "s-expression" */, + Mkql /* "mkql" */ + }; + + /** + * A facility for compiling sql and s-expressions and making programs from them. + */ + class IProgramFactory: public TThrRefBase { + protected: + virtual IPullStreamWorkerFactoryPtr MakePullStreamWorkerFactory(const TInputSpecBase&, const TOutputSpecBase&, TString, ETranslationMode, ui16) = 0; + virtual IPullListWorkerFactoryPtr MakePullListWorkerFactory(const TInputSpecBase&, const TOutputSpecBase&, TString, ETranslationMode, ui16) = 0; + virtual IPushStreamWorkerFactoryPtr MakePushStreamWorkerFactory(const TInputSpecBase&, const TOutputSpecBase&, TString, ETranslationMode, ui16) = 0; + + public: + /** + * Add new udf module. It's not specified whether adding new modules will affect existing programs + * (theoretical answer is 'no'). + */ + virtual void AddUdfModule(const TStringBuf&, NKikimr::NUdf::TUniquePtr<NKikimr::NUdf::IUdfModule>&&) = 0; + // TODO: support setting udf modules via factory options. + + /** + * Set new counters provider, override one that was specified via factory options. Note that existing + * programs will still reference the previous provider. + */ + virtual void SetCountersProvider(NKikimr::NUdf::ICountersProvider*) = 0; + // TODO: support setting providers via factory options. + + template <typename TInputSpec, typename TOutputSpec> + THolder<TPullStreamProgram<TInputSpec, TOutputSpec>> MakePullStreamProgram( + TInputSpec inputSpec, TOutputSpec outputSpec, TString query, ETranslationMode mode = ETranslationMode::SQL, ui16 syntaxVersion = 1 + ) { + auto workerFactory = MakePullStreamWorkerFactory(inputSpec, outputSpec, std::move(query), mode, syntaxVersion); + return MakeHolder<TPullStreamProgram<TInputSpec, TOutputSpec>>(std::move(inputSpec), std::move(outputSpec), workerFactory); + } + + template <typename TInputSpec, typename TOutputSpec> + THolder<TPullListProgram<TInputSpec, TOutputSpec>> MakePullListProgram( + TInputSpec inputSpec, TOutputSpec outputSpec, TString query, ETranslationMode mode = ETranslationMode::SQL, ui16 syntaxVersion = 1 + ) { + auto workerFactory = MakePullListWorkerFactory(inputSpec, outputSpec, std::move(query), mode, syntaxVersion); + return MakeHolder<TPullListProgram<TInputSpec, TOutputSpec>>(std::move(inputSpec), std::move(outputSpec), workerFactory); + } + + template <typename TInputSpec, typename TOutputSpec> + THolder<TPushStreamProgram<TInputSpec, TOutputSpec>> MakePushStreamProgram( + TInputSpec inputSpec, TOutputSpec outputSpec, TString query, ETranslationMode mode = ETranslationMode::SQL, ui16 syntaxVersion = 1 + ) { + auto workerFactory = MakePushStreamWorkerFactory(inputSpec, outputSpec, std::move(query), mode, syntaxVersion); + return MakeHolder<TPushStreamProgram<TInputSpec, TOutputSpec>>(std::move(inputSpec), std::move(outputSpec), workerFactory); + } + }; + + //////////////////////////////////////////////////////////////////////////////////////////////////// + + /** + * A facility for creating workers. Despite being a part of a public API, worker factory is not used directly. + */ + class IWorkerFactory: public std::enable_shared_from_this<IWorkerFactory> { + public: + virtual ~IWorkerFactory() = default; + /** + * Get input column names for specified input that are actually used in the query. + */ + virtual const THashSet<TString>& GetUsedColumns(ui32) const = 0; + /** + * Overload for single-input programs. + */ + virtual const THashSet<TString>& GetUsedColumns() const = 0; + + /** + * Make input type schema for specified input as deduced by program optimizer. This schema is equivalent + * to one provided by input spec up to the order of the fields in structures. + */ + virtual NYT::TNode MakeInputSchema(ui32) const = 0; + /** + * Overload for single-input programs. + */ + virtual NYT::TNode MakeInputSchema() const = 0; + + /** + * Make output type schema as deduced by program optimizer. If output spec provides its own schema, than + * this schema is equivalent to one provided by output spec up to the order of the fields in structures. + */ + /// @{ + /** + * Overload for single-table output programs (i.e. output type is struct). + */ + virtual NYT::TNode MakeOutputSchema() const = 0; + /** + * Overload for multi-table output programs (i.e. output type is variant over tuple). + */ + virtual NYT::TNode MakeOutputSchema(ui32) const = 0; + /** + * Overload for multi-table output programs (i.e. output type is variant over struct). + */ + virtual NYT::TNode MakeOutputSchema(TStringBuf) const = 0; + /// @} + + /** + * Make full output schema. For single-output programs returns struct type, for multi-output programs + * returns variant type. + * + * Warning: calling this function may result in extended memory usage for large number of output tables. + */ + virtual NYT::TNode MakeFullOutputSchema() const = 0; + + /** + * Get compilation issues + */ + virtual TIssues GetIssues() const = 0; + + /** + * Get precompiled mkql program + */ + virtual TString GetCompiledProgram() = 0; + + /** + * Return a worker to the factory for possible reuse + */ + virtual void ReturnWorker(IWorker* worker) = 0; + }; + + class TReleaseWorker { + public: + template <class T> + static inline void Destroy(T* t) noexcept { + t->Release(); + } + }; + + template <class T> + using TWorkerHolder = THolder<T, TReleaseWorker>; + + /** + * Factory for generating pull stream workers. + */ + class IPullStreamWorkerFactory: public IWorkerFactory { + public: + /** + * Create a new pull stream worker. + */ + virtual TWorkerHolder<IPullStreamWorker> MakeWorker() = 0; + }; + + /** + * Factory for generating pull list workers. + */ + class IPullListWorkerFactory: public IWorkerFactory { + public: + /** + * Create a new pull list worker. + */ + virtual TWorkerHolder<IPullListWorker> MakeWorker() = 0; + }; + + /** + * Factory for generating push stream workers. + */ + class IPushStreamWorkerFactory: public IWorkerFactory { + public: + /** + * Create a new push stream worker. + */ + virtual TWorkerHolder<IPushStreamWorker> MakeWorker() = 0; + }; + + //////////////////////////////////////////////////////////////////////////////////////////////////// + + /** + * Worker is a central part of any program instance. It contains current computation state + * (called computation graph) and objects required to work with it, including an allocator for unboxed values. + * + * Usually, users do not interact with workers directly. They use program instance entry points such as streams + * and consumers instead. The only case when one would have to to interact with workers is when implementing + * custom io-specification. + */ + class IWorker { + protected: + friend class TReleaseWorker; + /** + * Cleanup the worker and return to a worker factory for reuse + */ + virtual void Release() = 0; + + public: + virtual ~IWorker() = default; + + public: + /** + * Number of inputs for this program. + */ + virtual ui32 GetInputsCount() const = 0; + + /** + * MiniKQL input struct type of specified input for this program. Type is equivalent to the deduced input + * schema (see IWorker::MakeInputSchema()) + * + * If ``original`` is set to ``true``, returns type without virtual system columns. + */ + virtual const NKikimr::NMiniKQL::TStructType* GetInputType(ui32, bool original = false) const = 0; + /** + * Overload for single-input programs. + */ + virtual const NKikimr::NMiniKQL::TStructType* GetInputType(bool original = false) const = 0; + + /** + * MiniKQL output struct type for this program. The returned type is equivalent to the deduced output + * schema (see IWorker::MakeFullOutputSchema()). + */ + virtual const NKikimr::NMiniKQL::TType* GetOutputType() const = 0; + + /** + * Make input type schema for specified input as deduced by program optimizer. This schema is equivalent + * to one provided by input spec up to the order of the fields in structures. + */ + virtual NYT::TNode MakeInputSchema(ui32) const = 0; + /** + * Overload for single-input programs. + */ + virtual NYT::TNode MakeInputSchema() const = 0; + + /** + * Make output type schema as deduced by program optimizer. If output spec provides its own schema, than + * this schema is equivalent to one provided by output spec up to the order of the fields in structures. + */ + /// @{ + /** + * Overload for single-table output programs (i.e. output type is struct). + */ + virtual NYT::TNode MakeOutputSchema() const = 0; + /** + * Overload for multi-table output programs (i.e. output type is variant over tuple). + */ + virtual NYT::TNode MakeOutputSchema(ui32) const = 0; + /** + * Overload for multi-table output programs (i.e. output type is variant over struct). + */ + virtual NYT::TNode MakeOutputSchema(TStringBuf) const = 0; + /// @} + + /** + * Generates full output schema. For single-output programs returns struct type, for multi-output programs + * returns variant type. + * + * Warning: calling this function may result in extended memory usage for large number of output tables. + */ + virtual NYT::TNode MakeFullOutputSchema() const = 0; + + /** + * Get scoped alloc used in this worker. + */ + virtual NKikimr::NMiniKQL::TScopedAlloc& GetScopedAlloc() = 0; + + /** + * Get computation graph. + */ + virtual NKikimr::NMiniKQL::IComputationGraph& GetGraph() = 0; + + /** + * Get function registry for this worker. + */ + virtual const NKikimr::NMiniKQL::IFunctionRegistry& GetFunctionRegistry() const = 0; + + /** + * Get type environment for this worker. + */ + virtual NKikimr::NMiniKQL::TTypeEnvironment& GetTypeEnvironment() = 0; + + /** + * Get llvm settings for this worker. + */ + virtual const TString& GetLLVMSettings() const = 0; + + /** + * Get YT Type V3 flags + */ + virtual ui64 GetNativeYtTypeFlags() const = 0; + + /** + * Get time provider + */ + virtual ITimeProvider* GetTimeProvider() const = 0; + }; + + /** + * Worker which operates in pull stream mode. + */ + class IPullStreamWorker: public IWorker { + public: + /** + * Set input computation graph node for specified input. The passed unboxed value should be a stream of + * structs. It should be created via the allocator associated with this very worker. + * This function can only be called once for each input. + */ + virtual void SetInput(NKikimr::NUdf::TUnboxedValue&&, ui32) = 0; + + /** + * Get the output computation graph node. The returned node will be a stream of structs or variants. + * This function cannot be called before setting an input value. + */ + virtual NKikimr::NUdf::TUnboxedValue& GetOutput() = 0; + }; + + /** + * Worker which operates in pull list mode. + */ + class IPullListWorker: public IWorker { + public: + /** + * Set input computation graph node for specified input. The passed unboxed value should be a list of + * structs. It should be created via the allocator associated with this very worker. + * This function can only be called once for each index. + */ + virtual void SetInput(NKikimr::NUdf::TUnboxedValue&&, ui32) = 0; + + /** + * Get the output computation graph node. The returned node will be a list of structs or variants. + * This function cannot be called before setting an input value. + */ + virtual NKikimr::NUdf::TUnboxedValue& GetOutput() = 0; + + /** + * Get iterator over the output list. + */ + virtual NKikimr::NUdf::TUnboxedValue& GetOutputIterator() = 0; + + /** + * Reset iterator to the beginning of the output list. After calling this function, GetOutputIterator() + * will return a fresh iterator; all previously returned iterators will become invalid. + */ + virtual void ResetOutputIterator() = 0; + }; + + /** + * Worker which operates in push stream mode. + */ + class IPushStreamWorker: public IWorker { + public: + /** + * Set a consumer where the worker will relay its output. This function can only be called once. + */ + virtual void SetConsumer(THolder<IConsumer<const NKikimr::NUdf::TUnboxedValue*>>) = 0; + + /** + * Push new value to the graph, than feed all new output to the consumer. Values cannot be pushed before + * assigning a consumer. + */ + virtual void Push(NKikimr::NUdf::TUnboxedValue&&) = 0; + + /** + * Send finish event and clear the computation graph. No new values will be accepted. + */ + virtual void OnFinish() = 0; + }; + + //////////////////////////////////////////////////////////////////////////////////////////////////// + + /** + * Input specifications describe format for program input. They carry information about input data schema + * as well as the knowledge about how to convert input structures into unboxed values (data format which can be + * processed by the YQL runtime). + * + * Input spec defines the arguments of the program's Apply method. For example, a program + * with the protobuf input spec will accept a stream of protobuf messages while a program with the + * yson spec will accept an input stream (binary or text one). + * + * See documentation for input and output spec traits for hints on how to implement a custom specs. + */ + class TInputSpecBase { + protected: + mutable TVector<THashMap<TString, NYT::TNode>> AllVirtualColumns_; + + public: + virtual ~TInputSpecBase() = default; + + public: + /** + * Get input data schemas in YQL format (NB: not a YT format). Each item of the returned vector must + * describe a structure. + * + * Format of each item is approximately this one: + * + * @code + * [ + * 'StructType', + * [ + * ["Field1Name", ["DataType", "Int32"]], + * ["Field2Name", ["DataType", "String"]], + * ... + * ] + * ] + * @endcode + */ + virtual const TVector<NYT::TNode>& GetSchemas() const = 0; + // TODO: make a neat schema builder + + /** + * Get virtual columns for each input. + * + * Key of each mapping is column name, value is data schema in YQL format. + */ + const TVector<THashMap<TString, NYT::TNode>>& GetAllVirtualColumns() const { + if (AllVirtualColumns_.empty()) { + AllVirtualColumns_ = TVector<THashMap<TString, NYT::TNode>>(GetSchemas().size()); + } + + return AllVirtualColumns_; + } + }; + + /** + * Output specifications describe format for program output. Like input specifications, they cary knowledge + * about program output type and how to convert unboxed values into that type. + */ + class TOutputSpecBase { + private: + TMaybe<THashSet<TString>> OutputColumnsFilter_; + + public: + virtual ~TOutputSpecBase() = default; + + public: + /** + * Get output data schema in YQL format (NB: not a YT format). The returned value must describe a structure + * or a variant made of structures for fulti-table outputs (note: not all specs support multi-table output). + * + * See docs for the input spec's GetSchemas(). + * + * Also TNode entity could be returned (NYT::TNode::CreateEntity()), + * in which case output schema would be inferred from query and could be + * obtained by Program::GetOutputSchema() call. + */ + virtual const NYT::TNode& GetSchema() const = 0; + + /** + * Get an output columns filter. + * + * Output columns filter is a set of column names that should be left in the output. All columns that are + * not in this set will not be calculated. Depending on the output schema, they will be either removed + * completely (for optional columns) or filled with defaults (for required columns). + */ + const TMaybe<THashSet<TString>>& GetOutputColumnsFilter() const { + return OutputColumnsFilter_; + } + + /** + * Set new output columns filter. + */ + void SetOutputColumnsFilter(const TMaybe<THashSet<TString>>& outputColumnsFilter) { + OutputColumnsFilter_ = outputColumnsFilter; + } + }; + + //////////////////////////////////////////////////////////////////////////////////////////////////// + + /** + * Input spec traits provide information on how to process program input. + * + * Each input spec should create a template specialization for this class, in which it should provide several + * static variables and functions. + * + * For example, a hypothetical example of implementing a JSON input spec would look like this: + * + * @code + * class TJsonInputSpec: public TInputSpecBase { + * // whatever magic you require for this spec + * }; + * + * template <> + * class TInputSpecTraits<TJsonInputSpec> { + * // write here four constants, one typedef and three static functions described below + * }; + * @endcode + * + * @tparam T input spec type. + */ + template <typename T> + struct TInputSpecTraits { + /// Safety flag which should be set to false in all template specializations of this class. Attempt to + /// build a program using a spec with `IsPartial=true` will result in compilation error. + static const constexpr bool IsPartial = true; + + /// Indicates whether this spec supports pull stream mode. + static const constexpr bool SupportPullStreamMode = false; + /// Indicates whether this spec supports pull list mode. + static const constexpr bool SupportPullListMode = false; + /// Indicates whether this spec supports push stream mode. + static const constexpr bool SupportPushStreamMode = false; + + /// For push mode, indicates the return type of the builder's Process function. + using TConsumerType = void; + + /// For pull stream mode, should take an input spec, a pull stream worker and whatever the user passed + /// to the program's Apply function, create an unboxed values with a custom stream implementations + /// and pass it to the worker's SetInput function for each input. + template <typename ...A> + static void PreparePullStreamWorker(const T&, IPullStreamWorker*, A&&...) { + Y_UNREACHABLE(); + } + + /// For pull list mode, should take an input spec, a pull list worker and whatever the user passed + /// to the program's Apply function, create an unboxed values with a custom list implementations + /// and pass it to the worker's SetInput function for each input. + template <typename ...A> + static void PreparePullListWorker(const T&, IPullListWorker*, A&&...) { + Y_UNREACHABLE(); + } + + /// For push stream mode, should take an input spec and a worker and create a consumer which will + /// be returned to the user. The consumer should keep the worker alive until its own destruction. + /// The return type of this function should exactly match the one defined in ConsumerType typedef. + static TConsumerType MakeConsumer(const T&, TWorkerHolder<IPushStreamWorker>) { + Y_UNREACHABLE(); + } + }; + + /** + * Output spec traits provide information on how to process program output. Like with input specs, each output + * spec requires an appropriate template specialization of this class. + * + * @tparam T output spec type. + */ + template <typename T> + struct TOutputSpecTraits { + /// Safety flag which should be set to false in all template specializations of this class. Attempt to + /// build a program using a spec with `IsPartial=false` will result in compilation error. + static const constexpr bool IsPartial = true; + + /// Indicates whether this spec supports pull stream mode. + static const constexpr bool SupportPullStreamMode = false; + /// Indicates whether this spec supports pull list mode. + static const constexpr bool SupportPullListMode = false; + /// Indicates whether this spec supports push stream mode. + static const constexpr bool SupportPushStreamMode = false; + + /// For pull stream mode, indicates the return type of the program's Apply function. + using TPullStreamReturnType = void; + + /// For pull list mode, indicates the return type of the program's Apply function. + using TPullListReturnType = void; + + /// For pull stream mode, should take an output spec and a worker and build a stream which will be returned + /// to the user. The return type of this function must match the one specified in the PullStreamReturnType. + static TPullStreamReturnType ConvertPullStreamWorkerToOutputType(const T&, TWorkerHolder<IPullStreamWorker>) { + Y_UNREACHABLE(); + } + + /// For pull list mode, should take an output spec and a worker and build a list which will be returned + /// to the user. The return type of this function must match the one specified in the PullListReturnType. + static TPullListReturnType ConvertPullListWorkerToOutputType(const T&, TWorkerHolder<IPullListWorker>) { + Y_UNREACHABLE(); + } + + /// For push stream mode, should take an output spec, a worker and whatever arguments the user passed + /// to the program's Apply function, create a consumer for unboxed values and pass it to the worker's + /// SetConsumer function. + template <typename ...A> + static void SetConsumerToWorker(const T&, IPushStreamWorker*, A&&...) { + Y_UNREACHABLE(); + } + }; + + //////////////////////////////////////////////////////////////////////////////////////////////////// + +#define NOT_SPEC_MSG(spec_type) "passed class should be derived from " spec_type " spec base" +#define PARTIAL_SPEC_MSG(spec_type) "this " spec_type " spec does not define its traits. Make sure you've passed " \ + "an " spec_type " spec and not some other object; also make sure you've included " \ + "all necessary headers. If you're developing a spec, make sure you have " \ + "a spec traits template specialization" +#define UNSUPPORTED_MODE_MSG(spec_type, mode) "this " spec_type " spec does not support " mode " mode" + + class IProgram { + public: + virtual ~IProgram() = default; + + public: + virtual const TInputSpecBase& GetInputSpecBase() const = 0; + virtual const TOutputSpecBase& GetOutputSpecBase() const = 0; + virtual const THashSet<TString>& GetUsedColumns(ui32) const = 0; + virtual const THashSet<TString>& GetUsedColumns() const = 0; + virtual NYT::TNode MakeInputSchema(ui32) const = 0; + virtual NYT::TNode MakeInputSchema() const = 0; + virtual NYT::TNode MakeOutputSchema() const = 0; + virtual NYT::TNode MakeOutputSchema(ui32) const = 0; + virtual NYT::TNode MakeOutputSchema(TStringBuf) const = 0; + virtual NYT::TNode MakeFullOutputSchema() const = 0; + virtual TIssues GetIssues() const = 0; + virtual TString GetCompiledProgram() = 0; + + inline void MergeUsedColumns(THashSet<TString>& columns, ui32 inputIndex) { + const auto& usedColumns = GetUsedColumns(inputIndex); + columns.insert(usedColumns.begin(), usedColumns.end()); + } + + inline void MergeUsedColumns(THashSet<TString>& columns) { + const auto& usedColumns = GetUsedColumns(); + columns.insert(usedColumns.begin(), usedColumns.end()); + } + }; + + template <typename TInputSpec, typename TOutputSpec, typename WorkerFactory> + class TProgramCommon: public IProgram { + static_assert(std::is_base_of<TInputSpecBase, TInputSpec>::value, NOT_SPEC_MSG("input")); + static_assert(std::is_base_of<TOutputSpecBase, TOutputSpec>::value, NOT_SPEC_MSG("output")); + + protected: + TInputSpec InputSpec_; + TOutputSpec OutputSpec_; + std::shared_ptr<WorkerFactory> WorkerFactory_; + + public: + explicit TProgramCommon( + TInputSpec inputSpec, + TOutputSpec outputSpec, + std::shared_ptr<WorkerFactory> workerFactory + ) + : InputSpec_(inputSpec) + , OutputSpec_(outputSpec) + , WorkerFactory_(std::move(workerFactory)) + { + } + + public: + const TInputSpec& GetInputSpec() const { + return InputSpec_; + } + + const TOutputSpec& GetOutputSpec() const { + return OutputSpec_; + } + + const TInputSpecBase& GetInputSpecBase() const override { + return InputSpec_; + } + + const TOutputSpecBase& GetOutputSpecBase() const override { + return OutputSpec_; + } + + const THashSet<TString>& GetUsedColumns(ui32 inputIndex) const override { + return WorkerFactory_->GetUsedColumns(inputIndex); + } + + const THashSet<TString>& GetUsedColumns() const override { + return WorkerFactory_->GetUsedColumns(); + } + + NYT::TNode MakeInputSchema(ui32 inputIndex) const override { + return WorkerFactory_->MakeInputSchema(inputIndex); + } + + NYT::TNode MakeInputSchema() const override { + return WorkerFactory_->MakeInputSchema(); + } + + NYT::TNode MakeOutputSchema() const override { + return WorkerFactory_->MakeOutputSchema(); + } + + NYT::TNode MakeOutputSchema(ui32 outputIndex) const override { + return WorkerFactory_->MakeOutputSchema(outputIndex); + } + + NYT::TNode MakeOutputSchema(TStringBuf outputName) const override { + return WorkerFactory_->MakeOutputSchema(outputName); + } + + NYT::TNode MakeFullOutputSchema() const override { + return WorkerFactory_->MakeFullOutputSchema(); + } + + TIssues GetIssues() const override { + return WorkerFactory_->GetIssues(); + } + + TString GetCompiledProgram() override { + return WorkerFactory_->GetCompiledProgram(); + } + }; + + template <typename TInputSpec, typename TOutputSpec> + class TPullStreamProgram final: public TProgramCommon<TInputSpec, TOutputSpec, IPullStreamWorkerFactory> { + using TProgramCommon<TInputSpec, TOutputSpec, IPullStreamWorkerFactory>::WorkerFactory_; + using TProgramCommon<TInputSpec, TOutputSpec, IPullStreamWorkerFactory>::InputSpec_; + using TProgramCommon<TInputSpec, TOutputSpec, IPullStreamWorkerFactory>::OutputSpec_; + + public: + using TProgramCommon<TInputSpec, TOutputSpec, IPullStreamWorkerFactory>::TProgramCommon; + + public: + template <typename ...T> + typename TOutputSpecTraits<TOutputSpec>::TPullStreamReturnType Apply(T&& ... t) { + static_assert(!TInputSpecTraits<TInputSpec>::IsPartial, PARTIAL_SPEC_MSG("input")); + static_assert(!TOutputSpecTraits<TOutputSpec>::IsPartial, PARTIAL_SPEC_MSG("output")); + static_assert(TInputSpecTraits<TInputSpec>::SupportPullStreamMode, UNSUPPORTED_MODE_MSG("input", "pull stream")); + static_assert(TOutputSpecTraits<TOutputSpec>::SupportPullStreamMode, UNSUPPORTED_MODE_MSG("output", "pull stream")); + + auto worker = WorkerFactory_->MakeWorker(); + TInputSpecTraits<TInputSpec>::PreparePullStreamWorker(InputSpec_, worker.Get(), std::forward<T>(t)...); + return TOutputSpecTraits<TOutputSpec>::ConvertPullStreamWorkerToOutputType(OutputSpec_, std::move(worker)); + } + }; + + template <typename TInputSpec, typename TOutputSpec> + class TPullListProgram final: public TProgramCommon<TInputSpec, TOutputSpec, IPullListWorkerFactory> { + using TProgramCommon<TInputSpec, TOutputSpec, IPullListWorkerFactory>::WorkerFactory_; + using TProgramCommon<TInputSpec, TOutputSpec, IPullListWorkerFactory>::InputSpec_; + using TProgramCommon<TInputSpec, TOutputSpec, IPullListWorkerFactory>::OutputSpec_; + + public: + using TProgramCommon<TInputSpec, TOutputSpec, IPullListWorkerFactory>::TProgramCommon; + + public: + template <typename ...T> + typename TOutputSpecTraits<TOutputSpec>::TPullListReturnType Apply(T&& ... t) { + static_assert(!TInputSpecTraits<TInputSpec>::IsPartial, PARTIAL_SPEC_MSG("input")); + static_assert(!TOutputSpecTraits<TOutputSpec>::IsPartial, PARTIAL_SPEC_MSG("output")); + static_assert(TInputSpecTraits<TInputSpec>::SupportPullListMode, UNSUPPORTED_MODE_MSG("input", "pull list")); + static_assert(TOutputSpecTraits<TOutputSpec>::SupportPullListMode, UNSUPPORTED_MODE_MSG("output", "pull list")); + + auto worker = WorkerFactory_->MakeWorker(); + TInputSpecTraits<TInputSpec>::PreparePullListWorker(InputSpec_, worker.Get(), std::forward<T>(t)...); + return TOutputSpecTraits<TOutputSpec>::ConvertPullListWorkerToOutputType(OutputSpec_, std::move(worker)); + } + }; + + template <typename TInputSpec, typename TOutputSpec> + class TPushStreamProgram final: public TProgramCommon<TInputSpec, TOutputSpec, IPushStreamWorkerFactory> { + using TProgramCommon<TInputSpec, TOutputSpec, IPushStreamWorkerFactory>::WorkerFactory_; + using TProgramCommon<TInputSpec, TOutputSpec, IPushStreamWorkerFactory>::InputSpec_; + using TProgramCommon<TInputSpec, TOutputSpec, IPushStreamWorkerFactory>::OutputSpec_; + + public: + using TProgramCommon<TInputSpec, TOutputSpec, IPushStreamWorkerFactory>::TProgramCommon; + + public: + template <typename ...T> + typename TInputSpecTraits<TInputSpec>::TConsumerType Apply(T&& ... t) { + static_assert(!TInputSpecTraits<TInputSpec>::IsPartial, PARTIAL_SPEC_MSG("input")); + static_assert(!TOutputSpecTraits<TOutputSpec>::IsPartial, PARTIAL_SPEC_MSG("output")); + static_assert(TInputSpecTraits<TInputSpec>::SupportPushStreamMode, UNSUPPORTED_MODE_MSG("input", "push stream")); + static_assert(TOutputSpecTraits<TOutputSpec>::SupportPushStreamMode, UNSUPPORTED_MODE_MSG("output", "push stream")); + + auto worker = WorkerFactory_->MakeWorker(); + TOutputSpecTraits<TOutputSpec>::SetConsumerToWorker(OutputSpec_, worker.Get(), std::forward<T>(t)...); + return TInputSpecTraits<TInputSpec>::MakeConsumer(InputSpec_, std::move(worker)); + } + }; + +#undef NOT_SPEC_MSG +#undef PARTIAL_SPEC_MSG +#undef UNSUPPORTED_MODE_MSG + + //////////////////////////////////////////////////////////////////////////////////////////////////// + + /** + * Configure global logging facilities. Affects all YQL modules. + */ + void ConfigureLogging(const TLoggingOptions& = {}); + + /** + * Create a new program factory. + * Custom logging initialization could be preformed by a call to the ConfigureLogging method beforehand. + * If the ConfigureLogging method has not been called the default logging initialization will be performed. + */ + IProgramFactoryPtr MakeProgramFactory(const TProgramFactoryOptions& = {}); + } +} + +Y_DECLARE_OUT_SPEC(inline, NYql::NPureCalc::TCompileError, stream, value) { + stream << value.AsStrBuf() << Endl << "Issues:" << Endl << value.GetIssues() << Endl << Endl << "Yql:" << Endl <<value.GetYql(); +} diff --git a/ydb/library/yql/public/purecalc/common/logger_init.cpp b/ydb/library/yql/public/purecalc/common/logger_init.cpp new file mode 100644 index 0000000000..b172eb1d03 --- /dev/null +++ b/ydb/library/yql/public/purecalc/common/logger_init.cpp @@ -0,0 +1,32 @@ +#include "logger_init.h" + +#include <ydb/library/yql/utils/log/log.h> + +#include <atomic> + +namespace NYql { +namespace NPureCalc { + +namespace { + std::atomic_bool Initialized; +} + + void InitLogging(const TLoggingOptions& options) { + NLog::InitLogger(options.LogDestination); + auto& logger = NLog::YqlLogger(); + logger.SetDefaultPriority(options.LogLevel_); + for (int i = 0; i < NLog::EComponentHelpers::ToInt(NLog::EComponent::MaxValue); ++i) { + logger.SetComponentLevel((NLog::EComponent) i, (NLog::ELevel) options.LogLevel_); + } + Initialized = true; + } + + void EnsureLoggingInitialized() { + if (Initialized.load()) { + return; + } + InitLogging(TLoggingOptions()); + } + +} +} diff --git a/ydb/library/yql/public/purecalc/common/logger_init.h b/ydb/library/yql/public/purecalc/common/logger_init.h new file mode 100644 index 0000000000..039cbd4411 --- /dev/null +++ b/ydb/library/yql/public/purecalc/common/logger_init.h @@ -0,0 +1,10 @@ +#pragma once + +#include "interface.h" + +namespace NYql { + namespace NPureCalc { + void InitLogging(const TLoggingOptions& options); + void EnsureLoggingInitialized(); + } +} diff --git a/ydb/library/yql/public/purecalc/common/names.cpp b/ydb/library/yql/public/purecalc/common/names.cpp new file mode 100644 index 0000000000..551772842b --- /dev/null +++ b/ydb/library/yql/public/purecalc/common/names.cpp @@ -0,0 +1,16 @@ +#include "names.h" + +#include <util/generic/strbuf.h> + +namespace NYql::NPureCalc { + const TStringBuf PurecalcSysColumnsPrefix = "_yql_sys_"; + const TStringBuf PurecalcSysColumnTablePath = "_yql_sys_tablepath"; + + const TStringBuf PurecalcDefaultCluster = "view"; + const TStringBuf PurecalcDefaultService = "data"; + + const TStringBuf PurecalcInputCallableName = "Self"; + const TStringBuf PurecalcInputTablePrefix = "Input"; + + const TStringBuf PurecalcUdfModulePrefix = "<purecalc>::"; +} diff --git a/ydb/library/yql/public/purecalc/common/names.h b/ydb/library/yql/public/purecalc/common/names.h new file mode 100644 index 0000000000..dc08ccd3d0 --- /dev/null +++ b/ydb/library/yql/public/purecalc/common/names.h @@ -0,0 +1,16 @@ +#pragma once + +#include <util/generic/fwd.h> + +namespace NYql::NPureCalc { + extern const TStringBuf PurecalcSysColumnsPrefix; + extern const TStringBuf PurecalcSysColumnTablePath; + + extern const TStringBuf PurecalcDefaultCluster; + extern const TStringBuf PurecalcDefaultService; + + extern const TStringBuf PurecalcInputCallableName; + extern const TStringBuf PurecalcInputTablePrefix; + + extern const TStringBuf PurecalcUdfModulePrefix; +} diff --git a/ydb/library/yql/public/purecalc/common/processor_mode.cpp b/ydb/library/yql/public/purecalc/common/processor_mode.cpp new file mode 100644 index 0000000000..957cc2d7f4 --- /dev/null +++ b/ydb/library/yql/public/purecalc/common/processor_mode.cpp @@ -0,0 +1 @@ +#include "processor_mode.h" diff --git a/ydb/library/yql/public/purecalc/common/processor_mode.h b/ydb/library/yql/public/purecalc/common/processor_mode.h new file mode 100644 index 0000000000..9bec87cadc --- /dev/null +++ b/ydb/library/yql/public/purecalc/common/processor_mode.h @@ -0,0 +1,11 @@ +#pragma once + +namespace NYql { + namespace NPureCalc { + enum class EProcessorMode { + PullList, + PullStream, + PushStream + }; + } +} diff --git a/ydb/library/yql/public/purecalc/common/program_factory.cpp b/ydb/library/yql/public/purecalc/common/program_factory.cpp new file mode 100644 index 0000000000..53b30f884b --- /dev/null +++ b/ydb/library/yql/public/purecalc/common/program_factory.cpp @@ -0,0 +1,144 @@ +#include "program_factory.h" +#include "logger_init.h" +#include "names.h" +#include "worker_factory.h" + +#include <ydb/library/yql/utils/log/log.h> + +using namespace NYql; +using namespace NYql::NPureCalc; + +TProgramFactory::TProgramFactory(const TProgramFactoryOptions& options) + : Options_(options) + , CountersProvider_(nullptr) +{ + EnsureLoggingInitialized(); + + NUserData::TUserData::UserDataToLibraries(Options_.UserData_, Modules_); + + UserData_ = GetYqlModuleResolver(ExprContext_, ModuleResolver_, Options_.UserData_, {}, {}); + + if (!ModuleResolver_) { + ythrow TCompileError("", ExprContext_.IssueManager.GetIssues().ToString()) << "failed to compile modules"; + } + + TVector<TString> UDFsPaths; + for (const auto& item: Options_.UserData_) { + if ( + item.Type_ == NUserData::EType::UDF && + item.Disposition_ == NUserData::EDisposition::FILESYSTEM + ) { + UDFsPaths.push_back(item.Content_); + } + } + + if (!Options_.UdfsDir_.empty()) { + NKikimr::NMiniKQL::FindUdfsInDir(Options_.UdfsDir_, &UDFsPaths); + } + + FuncRegistry_ = NKikimr::NMiniKQL::CreateFunctionRegistry( + &NYql::NBacktrace::KikimrBackTrace, NKikimr::NMiniKQL::CreateBuiltinRegistry(), false, UDFsPaths)->Clone(); + + NKikimr::NMiniKQL::FillStaticModules(*FuncRegistry_); +} + +TProgramFactory::~TProgramFactory() { +} + +void TProgramFactory::AddUdfModule( + const TStringBuf& moduleName, + NKikimr::NUdf::TUniquePtr<NKikimr::NUdf::IUdfModule>&& module +) { + FuncRegistry_->AddModule( + TString::Join(PurecalcUdfModulePrefix, moduleName), moduleName, std::move(module) + ); +} + +void TProgramFactory::SetCountersProvider(NKikimr::NUdf::ICountersProvider* provider) { + CountersProvider_ = provider; +} + +IPullStreamWorkerFactoryPtr TProgramFactory::MakePullStreamWorkerFactory( + const TInputSpecBase& inputSpec, + const TOutputSpecBase& outputSpec, + TString query, + ETranslationMode mode, + ui16 syntaxVersion +) { + return std::make_shared<TPullStreamWorkerFactory>(TWorkerFactoryOptions( + TIntrusivePtr<TProgramFactory>(this), + inputSpec, + outputSpec, + query, + FuncRegistry_, + ModuleResolver_, + UserData_, + Modules_, + Options_.LLVMSettings, + CountersProvider_, + mode, + syntaxVersion, + Options_.NativeYtTypeFlags, + Options_.DeterministicTimeProviderSeed, + Options_.UseSystemColumns, + Options_.UseWorkerPool + )); +} + +IPullListWorkerFactoryPtr TProgramFactory::MakePullListWorkerFactory( + const TInputSpecBase& inputSpec, + const TOutputSpecBase& outputSpec, + TString query, + ETranslationMode mode, + ui16 syntaxVersion +) { + return std::make_shared<TPullListWorkerFactory>(TWorkerFactoryOptions( + TIntrusivePtr<TProgramFactory>(this), + inputSpec, + outputSpec, + query, + FuncRegistry_, + ModuleResolver_, + UserData_, + Modules_, + Options_.LLVMSettings, + CountersProvider_, + mode, + syntaxVersion, + Options_.NativeYtTypeFlags, + Options_.DeterministicTimeProviderSeed, + Options_.UseSystemColumns, + Options_.UseWorkerPool + )); +} + +IPushStreamWorkerFactoryPtr TProgramFactory::MakePushStreamWorkerFactory( + const TInputSpecBase& inputSpec, + const TOutputSpecBase& outputSpec, + TString query, + ETranslationMode mode, + ui16 syntaxVersion +) { + if (inputSpec.GetSchemas().size() > 1) { + ythrow yexception() << "push stream mode doesn't support several inputs"; + } + + return std::make_shared<TPushStreamWorkerFactory>(TWorkerFactoryOptions( + TIntrusivePtr<TProgramFactory>(this), + inputSpec, + outputSpec, + query, + FuncRegistry_, + ModuleResolver_, + UserData_, + Modules_, + Options_.LLVMSettings, + CountersProvider_, + mode, + syntaxVersion, + Options_.NativeYtTypeFlags, + Options_.DeterministicTimeProviderSeed, + Options_.UseSystemColumns, + Options_.UseWorkerPool + )); +} diff --git a/ydb/library/yql/public/purecalc/common/program_factory.h b/ydb/library/yql/public/purecalc/common/program_factory.h new file mode 100644 index 0000000000..d1402c21fd --- /dev/null +++ b/ydb/library/yql/public/purecalc/common/program_factory.h @@ -0,0 +1,46 @@ +#pragma once + +#include "interface.h" + +#include <ydb/library/yql/utils/backtrace/backtrace.h> +#include <ydb/library/yql/core/services/mounts/yql_mounts.h> + +#include <ydb/library/yql/ast/yql_expr.h> +#include <ydb/library/yql/core/yql_user_data.h> +#include <ydb/library/yql/minikql/mkql_function_registry.h> +#include <ydb/library/yql/minikql/invoke_builtins/mkql_builtins.h> + +#include <util/generic/function.h> +#include <util/generic/ptr.h> +#include <util/generic/strbuf.h> + +namespace NYql { + namespace NPureCalc { + class TProgramFactory: public IProgramFactory { + private: + TProgramFactoryOptions Options_; + TExprContext ExprContext_; + TIntrusivePtr<NKikimr::NMiniKQL::IMutableFunctionRegistry> FuncRegistry_; + IModuleResolver::TPtr ModuleResolver_; + TUserDataTable UserData_; + THashMap<TString, TString> Modules_; + NKikimr::NUdf::ICountersProvider* CountersProvider_; + + public: + explicit TProgramFactory(const TProgramFactoryOptions&); + ~TProgramFactory() override; + + public: + void AddUdfModule( + const TStringBuf& moduleName, + NKikimr::NUdf::TUniquePtr<NKikimr::NUdf::IUdfModule>&& module + ) override; + + void SetCountersProvider(NKikimr::NUdf::ICountersProvider* provider) override; + + IPullStreamWorkerFactoryPtr MakePullStreamWorkerFactory(const TInputSpecBase&, const TOutputSpecBase&, TString, ETranslationMode, ui16) override; + IPullListWorkerFactoryPtr MakePullListWorkerFactory(const TInputSpecBase&, const TOutputSpecBase&, TString, ETranslationMode, ui16) override; + IPushStreamWorkerFactoryPtr MakePushStreamWorkerFactory(const TInputSpecBase&, const TOutputSpecBase&, TString, ETranslationMode, ui16) override; + }; + } +} diff --git a/ydb/library/yql/public/purecalc/common/transformations/align_output_schema.cpp b/ydb/library/yql/public/purecalc/common/transformations/align_output_schema.cpp new file mode 100644 index 0000000000..b339488cbd --- /dev/null +++ b/ydb/library/yql/public/purecalc/common/transformations/align_output_schema.cpp @@ -0,0 +1,93 @@ +#include "align_output_schema.h" + +#include <ydb/library/yql/public/purecalc/common/type_from_schema.h> + +#include <ydb/library/yql/core/yql_expr_type_annotation.h> + +using namespace NYql; +using namespace NYql::NPureCalc; + +namespace { + class TOutputAligner : public TSyncTransformerBase { + private: + const TTypeAnnotationNode* OutputStruct_; + EProcessorMode ProcessorMode_; + + public: + explicit TOutputAligner(const TTypeAnnotationNode* outputStruct, EProcessorMode processorMode) + : OutputStruct_(outputStruct) + , ProcessorMode_(processorMode) + { + } + + public: + TStatus DoTransform(TExprNode::TPtr input, TExprNode::TPtr& output, TExprContext& ctx) final { + output = input; + + const auto* expectedType = MakeExpectedType(ctx); + const auto* expectedItemType = MakeExpectedItemType(); + const auto* actualType = MakeActualType(input); + const auto* actualItemType = MakeActualItemType(input); + + if (!ValidateOutputType(actualItemType, expectedItemType, ctx)) { + return TStatus::Error; + } + + if (!expectedType) { + return TStatus::Ok; + } + + auto status = TryConvertTo(output, *actualType, *expectedType, ctx); + + if (status.Level == IGraphTransformer::TStatus::Repeat) { + status = IGraphTransformer::TStatus(IGraphTransformer::TStatus::Repeat, true); + } + + return status; + } + + void Rewind() final { + } + + private: + const TTypeAnnotationNode* MakeExpectedType(TExprContext& ctx) { + if (!OutputStruct_) { + return nullptr; + } + + switch (ProcessorMode_) { + case EProcessorMode::PullList: + return ctx.MakeType<TListExprType>(OutputStruct_); + case EProcessorMode::PullStream: + case EProcessorMode::PushStream: + return ctx.MakeType<TStreamExprType>(OutputStruct_); + } + + Y_FAIL("Unexpected"); + } + + const TTypeAnnotationNode* MakeExpectedItemType() { + return OutputStruct_; + } + + const TTypeAnnotationNode* MakeActualType(TExprNode::TPtr& input) { + return input->GetTypeAnn(); + } + + const TTypeAnnotationNode* MakeActualItemType(TExprNode::TPtr& input) { + auto actualType = MakeActualType(input); + switch (actualType->GetKind()) { + case ETypeAnnotationKind::Stream: + return actualType->Cast<TStreamExprType>()->GetItemType(); + case ETypeAnnotationKind::List: + return actualType->Cast<TListExprType>()->GetItemType(); + default: + Y_FAIL("unexpected return type"); + } + } + }; +} + +TAutoPtr<IGraphTransformer> NYql::NPureCalc::MakeOutputAligner(const TTypeAnnotationNode* outputStruct, EProcessorMode processorMode) { + return new TOutputAligner(outputStruct, processorMode); +} diff --git a/ydb/library/yql/public/purecalc/common/transformations/align_output_schema.h b/ydb/library/yql/public/purecalc/common/transformations/align_output_schema.h new file mode 100644 index 0000000000..667a50ebf1 --- /dev/null +++ b/ydb/library/yql/public/purecalc/common/transformations/align_output_schema.h @@ -0,0 +1,21 @@ +#pragma once + +#include <ydb/library/yql/public/purecalc/common/processor_mode.h> + +#include <ydb/library/yql/core/yql_graph_transformer.h> +#include <ydb/library/yql/core/yql_type_annotation.h> + +namespace NYql { + namespace NPureCalc { + /** + * A transformer which converts an output type of the expression to the given type or reports an error. + * + * @param outputStruct destination output struct type. + * @return a graph transformer for type alignment. + */ + TAutoPtr<IGraphTransformer> MakeOutputAligner( + const TTypeAnnotationNode* outputStruct, + EProcessorMode processorMode + ); + } +} diff --git a/ydb/library/yql/public/purecalc/common/transformations/extract_used_columns.cpp b/ydb/library/yql/public/purecalc/common/transformations/extract_used_columns.cpp new file mode 100644 index 0000000000..20cffb0112 --- /dev/null +++ b/ydb/library/yql/public/purecalc/common/transformations/extract_used_columns.cpp @@ -0,0 +1,96 @@ +#include "extract_used_columns.h" + +#include <ydb/library/yql/public/purecalc/common/inspect_input.h> + +#include <ydb/library/yql/core/yql_expr_optimize.h> +#include <ydb/library/yql/core/expr_nodes/yql_expr_nodes.h> + +using namespace NYql; +using namespace NYql::NPureCalc; + +namespace { + class TUsedColumnsExtractor : public TSyncTransformerBase { + private: + TVector<THashSet<TString>>* const Destination_; + const TVector<THashSet<TString>>& AllColumns_; + TString NodeName_; + + bool CalculatedUsedFields_ = false; + + public: + TUsedColumnsExtractor( + TVector<THashSet<TString>>* destination, + const TVector<THashSet<TString>>& allColumns, + TString nodeName + ) + : Destination_(destination) + , AllColumns_(allColumns) + , NodeName_(std::move(nodeName)) + { + } + + TUsedColumnsExtractor(TVector<THashSet<TString>>*, TVector<THashSet<TString>>&&, TString) = delete; + + public: + TStatus DoTransform(TExprNode::TPtr input, TExprNode::TPtr& output, TExprContext& ctx) final { + output = input; + + if (CalculatedUsedFields_) { + return IGraphTransformer::TStatus::Ok; + } + + bool hasError = false; + + *Destination_ = AllColumns_; + + VisitExpr(input, [&](const TExprNode::TPtr& inputExpr) { + NNodes::TExprBase node(inputExpr); + if (auto maybeExtract = node.Maybe<NNodes::TCoExtractMembers>()) { + auto extract = maybeExtract.Cast(); + const auto& arg = extract.Input().Ref(); + if (arg.IsCallable(NodeName_)) { + ui32 inputIndex; + if (!TryFetchInputIndexFromSelf(arg, ctx, AllColumns_.size(), inputIndex)) { + hasError = true; + return false; + } + + YQL_ENSURE(inputIndex < AllColumns_.size()); + + auto& destinationColumnsSet = (*Destination_)[inputIndex]; + const auto& allColumnsSet = AllColumns_[inputIndex]; + + destinationColumnsSet.clear(); + for (const auto& columnAtom : extract.Members()) { + TString name = TString(columnAtom.Value()); + YQL_ENSURE(allColumnsSet.contains(name), "unexpected column in the input struct"); + destinationColumnsSet.insert(name); + } + } + } + + return true; + }); + + if (hasError) { + return IGraphTransformer::TStatus::Error; + } + + CalculatedUsedFields_ = true; + + return IGraphTransformer::TStatus::Ok; + } + + void Rewind() final { + CalculatedUsedFields_ = false; + } + }; +} + +TAutoPtr<IGraphTransformer> NYql::NPureCalc::MakeUsedColumnsExtractor( + TVector<THashSet<TString>>* destination, + const TVector<THashSet<TString>>& allColumns, + const TString& nodeName +) { + return new TUsedColumnsExtractor(destination, allColumns, nodeName); +} diff --git a/ydb/library/yql/public/purecalc/common/transformations/extract_used_columns.h b/ydb/library/yql/public/purecalc/common/transformations/extract_used_columns.h new file mode 100644 index 0000000000..0199be46f7 --- /dev/null +++ b/ydb/library/yql/public/purecalc/common/transformations/extract_used_columns.h @@ -0,0 +1,29 @@ +#pragma once + +#include <ydb/library/yql/public/purecalc/common/names.h> + +#include <ydb/library/yql/core/yql_graph_transformer.h> +#include <ydb/library/yql/core/yql_type_annotation.h> + +#include <util/generic/hash_set.h> +#include <util/generic/string.h> + +namespace NYql { + namespace NPureCalc { + /** + * Make transformation which builds sets of input columns from the given expression. + * + * @param destination a vector of string sets which will be populated with column names sets when + * transformation pipeline is launched. This pointer should contain a valid + * TVector<THashSet> instance. The transformation will overwrite its contents. + * @param allColumns vector of sets with all available columns for each input. + * @param nodeName name of the callable used to get input data, e.g. `Self`. + * @return an extractor which scans an input structs contents and populates destination. + */ + TAutoPtr<IGraphTransformer> MakeUsedColumnsExtractor( + TVector<THashSet<TString>>* destination, + const TVector<THashSet<TString>>& allColumns, + const TString& nodeName = TString{PurecalcInputCallableName} + ); + } +} diff --git a/ydb/library/yql/public/purecalc/common/transformations/output_columns_filter.cpp b/ydb/library/yql/public/purecalc/common/transformations/output_columns_filter.cpp new file mode 100644 index 0000000000..c6eaf01139 --- /dev/null +++ b/ydb/library/yql/public/purecalc/common/transformations/output_columns_filter.cpp @@ -0,0 +1,100 @@ +#include "output_columns_filter.h" + +#include <ydb/library/yql/core/yql_expr_type_annotation.h> + +using namespace NYql; +using namespace NYql::NPureCalc; + +namespace { + class TOutputColumnsFilter: public TSyncTransformerBase { + private: + TMaybe<THashSet<TString>> Filter_; + bool Fired_; + + public: + explicit TOutputColumnsFilter(TMaybe<THashSet<TString>> filter) + : Filter_(std::move(filter)) + , Fired_(false) + { + } + + public: + void Rewind() override { + Fired_ = false; + } + + TStatus DoTransform(TExprNode::TPtr input, TExprNode::TPtr& output, TExprContext& ctx) final { + output = input; + + if (Fired_ || Filter_.Empty()) { + return IGraphTransformer::TStatus::Ok; + } + + const TTypeAnnotationNode* returnType = output->GetTypeAnn(); + const TTypeAnnotationNode* returnItemType = nullptr; + switch (returnType->GetKind()) { + case ETypeAnnotationKind::Stream: + returnItemType = returnType->Cast<TStreamExprType>()->GetItemType(); + break; + case ETypeAnnotationKind::List: + returnItemType = returnType->Cast<TListExprType>()->GetItemType(); + break; + default: + Y_FAIL("unexpected return type"); + } + + if (returnItemType->GetKind() != ETypeAnnotationKind::Struct) { + ctx.AddError(TIssue(ctx.GetPosition(output->Pos()), "columns filter only supported for single-output programs")); + } + + const auto* returnItemStruct = returnItemType->Cast<TStructExprType>(); + + auto arg = ctx.NewArgument(TPositionHandle(), "row"); + TExprNode::TListType asStructItems; + for (const auto& x : returnItemStruct->GetItems()) { + TExprNode::TPtr value; + if (Filter_->contains(x->GetName())) { + value = ctx.Builder({}) + .Callable("Member") + .Add(0, arg) + .Atom(1, x->GetName()) + .Seal() + .Build(); + } else { + auto type = x->GetItemType(); + value = ctx.Builder({}) + .Callable(type->GetKind() == ETypeAnnotationKind::Optional ? "Nothing" : "Default") + .Add(0, ExpandType({}, *type, ctx)) + .Seal() + .Build(); + } + + auto item = ctx.Builder({}) + .List() + .Atom(0, x->GetName()) + .Add(1, value) + .Seal() + .Build(); + + asStructItems.push_back(item); + } + + auto body = ctx.NewCallable(TPositionHandle(), "AsStruct", std::move(asStructItems)); + auto lambda = ctx.NewLambda(TPositionHandle(), ctx.NewArguments(TPositionHandle(), {arg}), std::move(body)); + output = ctx.Builder(TPositionHandle()) + .Callable("Map") + .Add(0, output) + .Add(1, lambda) + .Seal() + .Build(); + + Fired_ = true; + + return IGraphTransformer::TStatus(IGraphTransformer::TStatus::Repeat, true); + } + }; +} + +TAutoPtr<IGraphTransformer> NYql::NPureCalc::MakeOutputColumnsFilter(const TMaybe<THashSet<TString>>& columns) { + return new TOutputColumnsFilter(columns); +} diff --git a/ydb/library/yql/public/purecalc/common/transformations/output_columns_filter.h b/ydb/library/yql/public/purecalc/common/transformations/output_columns_filter.h new file mode 100644 index 0000000000..1e86ae5276 --- /dev/null +++ b/ydb/library/yql/public/purecalc/common/transformations/output_columns_filter.h @@ -0,0 +1,18 @@ +#pragma once + +#include <ydb/library/yql/public/purecalc/common/processor_mode.h> + +#include <ydb/library/yql/core/yql_graph_transformer.h> +#include <ydb/library/yql/core/yql_type_annotation.h> + +namespace NYql { + namespace NPureCalc { + /** + * A transformer which removes unwanted columns from output. + * + * @param columns remove all columns that are not in this set. + * @return a graph transformer for filtering output. + */ + TAutoPtr<IGraphTransformer> MakeOutputColumnsFilter(const TMaybe<THashSet<TString>>& columns); + } +} diff --git a/ydb/library/yql/public/purecalc/common/transformations/replace_table_reads.cpp b/ydb/library/yql/public/purecalc/common/transformations/replace_table_reads.cpp new file mode 100644 index 0000000000..20b7eaa174 --- /dev/null +++ b/ydb/library/yql/public/purecalc/common/transformations/replace_table_reads.cpp @@ -0,0 +1,216 @@ +#include "replace_table_reads.h" + +#include <ydb/library/yql/public/purecalc/common/names.h> + +#include <ydb/library/yql/core/yql_expr_optimize.h> +#include <ydb/library/yql/core/yql_expr_type_annotation.h> + +using namespace NYql; +using namespace NYql::NPureCalc; + +namespace { + class TTableReadsReplacer: public TSyncTransformerBase { + private: + ui32 InputsNumber_; + bool UseSystemColumns_; + TString TablePrefix_; + TString CallableName_; + bool Complete_ = false; + + public: + explicit TTableReadsReplacer( + ui32 inputsNumber, + bool useSystemColumns, + TString tablePrefix, + TString inputNodeName + ) + : InputsNumber_(inputsNumber) + , UseSystemColumns_(useSystemColumns) + , TablePrefix_(std::move(tablePrefix)) + , CallableName_(std::move(inputNodeName)) + { + } + + TTableReadsReplacer(TVector<const TStructExprType*>&&, TString, TString) = delete; + + public: + TStatus DoTransform(const TExprNode::TPtr input, TExprNode::TPtr& output, TExprContext& ctx) final { + output = input; + if (Complete_) { + return TStatus::Ok; + } + + TOptimizeExprSettings settings(nullptr); + + auto status = OptimizeExpr(input, output, [&](const TExprNode::TPtr& node, TExprContext& ctx) -> TExprNode::TPtr { + if (node->IsCallable(NNodes::TCoRight::CallableName())) { + TIssueScopeGuard issueScope(ctx.IssueManager, [&]() { + return new TIssue(ctx.GetPosition(node->Pos()), TStringBuilder() << "At function: " << node->Content()); + }); + + if (!EnsureMinArgsCount(*node, 1, ctx)) { + return nullptr; + } + + if (!node->Child(0)->IsCallable(NNodes::TCoRead::CallableName())) { + ctx.AddError(TIssue(ctx.GetPosition(node->Child(0)->Pos()), TStringBuilder() << "Expected Read!")); + return nullptr; + } + + return BuildInputFromRead(node->Pos(), node->ChildPtr(0), ctx); + } + + return node; + }, ctx, settings); + + if (status.Level == TStatus::Ok) { + Complete_ = true; + } + return status; + } + + void Rewind() override { + Complete_ = false; + } + + private: + TExprNode::TPtr BuildInputFromRead(TPositionHandle replacePos, const TExprNode::TPtr& node, TExprContext& ctx) { + TIssueScopeGuard issueScope(ctx.IssueManager, [&]() { + return MakeIntrusive<TIssue>(ctx.GetPosition(node->Pos()), TStringBuilder() << "At function: " << node->Content()); + }); + + if (!EnsureMinArgsCount(*node, 3, ctx)) { + return nullptr; + } + + const auto source = node->ChildPtr(2); + if (source->IsCallable(NNodes::TCoKey::CallableName())) { + return BuildInputFromKey(replacePos, source, ctx); + } + if (source->IsCallable("DataTables")) { + return BuildInputFromDataTables(replacePos, source, ctx); + } + + ctx.AddError(TIssue(ctx.GetPosition(source->Pos()), TStringBuilder() << "Unsupported read source: " << source->Content())); + + return nullptr; + } + + TExprNode::TPtr BuildInputFromKey(TPositionHandle replacePos, const TExprNode::TPtr& node, TExprContext& ctx) { + TIssueScopeGuard issueScope(ctx.IssueManager, [&]() { + return MakeIntrusive<TIssue>(ctx.GetPosition(node->Pos()), TStringBuilder() << "At function: " << node->Content()); + }); + + ui32 inputIndex; + TExprNode::TPtr inputTableName; + + if (!TryFetchInputIndexFromKey(node, ctx, inputIndex, inputTableName)) { + return nullptr; + } + + YQL_ENSURE(inputTableName->IsCallable(NNodes::TCoString::CallableName())); + + auto inputNode = ctx.Builder(replacePos) + .Callable(CallableName_) + .Atom(0, ToString(inputIndex)) + .Seal() + .Build(); + + if (UseSystemColumns_) { + auto mapLambda = ctx.Builder(replacePos) + .Lambda() + .Param("row") + .Callable(0, NNodes::TCoAddMember::CallableName()) + .Arg(0, "row") + .Atom(1, PurecalcSysColumnTablePath) + .Add(2, inputTableName) + .Seal() + .Seal() + .Build(); + + return ctx.Builder(replacePos) + .Callable(NNodes::TCoMap::CallableName()) + .Add(0, std::move(inputNode)) + .Add(1, std::move(mapLambda)) + .Seal() + .Build(); + } + + return inputNode; + } + + TExprNode::TPtr BuildInputFromDataTables(TPositionHandle replacePos, const TExprNode::TPtr& node, TExprContext& ctx) { + TIssueScopeGuard issueScope(ctx.IssueManager, [&]() { + return MakeIntrusive<TIssue>(ctx.GetPosition(node->Pos()), TStringBuilder() << "At function: " << node->Content()); + }); + + if (!InputsNumber_) { + ctx.AddError(TIssue(ctx.GetPosition(node->Pos()), "No inputs provided by input spec")); + return nullptr; + } + + if (!EnsureArgsCount(*node, 0, ctx)) { + return nullptr; + } + + auto builder = ctx.Builder(replacePos); + + if (InputsNumber_ > 1) { + auto listBuilder = builder.List(); + + for (ui32 i = 0; i < InputsNumber_; ++i) { + listBuilder.Callable(i, CallableName_).Atom(0, ToString(i)).Seal(); + } + + return listBuilder.Seal().Build(); + } + + return builder.Callable(CallableName_).Atom(0, "0").Seal().Build(); + } + + bool TryFetchInputIndexFromKey(const TExprNode::TPtr& node, TExprContext& ctx, ui32& resultIndex, TExprNode::TPtr& resultTableName) { + if (!EnsureArgsCount(*node, 1, ctx)) { + return false; + } + + const auto* keyArg = node->Child(0); + if (!keyArg->IsList() || keyArg->ChildrenSize() != 2 || !keyArg->Child(0)->IsAtom("table") || + !keyArg->Child(1)->IsCallable(NNodes::TCoString::CallableName())) + { + ctx.AddError(TIssue(ctx.GetPosition(keyArg->Pos()), "Expected single table name")); + return false; + } + + resultTableName = keyArg->ChildPtr(1); + + auto tableName = resultTableName->Child(0)->Content(); + + if (!tableName.StartsWith(TablePrefix_)) { + ctx.AddError(TIssue(ctx.GetPosition(resultTableName->Child(0)->Pos()), + TStringBuilder() << "Invalid table name " << TString{tableName}.Quote() << ": prefix must be " << TablePrefix_.Quote())); + return false; + } + + tableName.SkipPrefix(TablePrefix_); + + if (!tableName) { + resultIndex = 0; + } else if (!TryFromString(tableName, resultIndex)) { + ctx.AddError(TIssue(ctx.GetPosition(resultTableName->Child(0)->Pos()), + TStringBuilder() << "Invalid table name " << TString{tableName}.Quote() << ": suffix must be UI32 number")); + return false; + } + + return true; + } + }; +} + +TAutoPtr<IGraphTransformer> NYql::NPureCalc::MakeTableReadsReplacer( + ui32 inputsNumber, + bool useSystemColumns, + TString tablePrefix, + TString callableName +) { + return new TTableReadsReplacer(inputsNumber, useSystemColumns, std::move(tablePrefix), std::move(callableName)); +} diff --git a/ydb/library/yql/public/purecalc/common/transformations/replace_table_reads.h b/ydb/library/yql/public/purecalc/common/transformations/replace_table_reads.h new file mode 100644 index 0000000000..9c0196800d --- /dev/null +++ b/ydb/library/yql/public/purecalc/common/transformations/replace_table_reads.h @@ -0,0 +1,28 @@ +#pragma once + +#include <ydb/library/yql/public/purecalc/common/names.h> + +#include <ydb/library/yql/core/yql_graph_transformer.h> + +namespace NYql::NPureCalc { + /** + * SQL translation would generate a standard Read! call to read each input table. It will than generate + * a Right! call to get the table data from a tuple returned by Read!. This transformation replaces any Right! + * call with a call to special function used to get input data. + * + * Each table name must starts with the specified prefix and ends with an index of program input (e.g. `Input0`). + * Name without numeric suffix is an alias for the first input. + * + * @param inputStructs types of each input. + * @param useSystemColumns whether to allow special system columns in input structs. + * @param tablePrefix required prefix for all table names (e.g. `Input`). + * @param callableName name of the special callable used to get input data (e.g. `Self`). + * @param return a graph transformer for replacing table reads. + */ + TAutoPtr<IGraphTransformer> MakeTableReadsReplacer( + ui32 inputsNumber, + bool useSystemColumns, + TString tablePrefix = TString{PurecalcInputTablePrefix}, + TString callableName = TString{PurecalcInputCallableName} + ); +} diff --git a/ydb/library/yql/public/purecalc/common/transformations/type_annotation.cpp b/ydb/library/yql/public/purecalc/common/transformations/type_annotation.cpp new file mode 100644 index 0000000000..9ff39d19e9 --- /dev/null +++ b/ydb/library/yql/public/purecalc/common/transformations/type_annotation.cpp @@ -0,0 +1,228 @@ +#include "type_annotation.h" + +#include <ydb/library/yql/public/purecalc/common/interface.h> +#include <ydb/library/yql/public/purecalc/common/inspect_input.h> +#include <ydb/library/yql/public/purecalc/common/names.h> + +#include <ydb/library/yql/core/type_ann/type_ann_core.h> +#include <ydb/library/yql/core/yql_expr_type_annotation.h> + +#include <util/generic/fwd.h> + +using namespace NYql; +using namespace NYql::NPureCalc; + +namespace { + class TTypeAnnotatorBase: public TSyncTransformerBase { + public: + using THandler = std::function<TStatus(const TExprNode::TPtr&, TExprNode::TPtr&, TExprContext&)>; + + TTypeAnnotatorBase(TTypeAnnotationContextPtr typeAnnotationContext) + { + OriginalTransformer_.reset(CreateExtCallableTypeAnnotationTransformer(*typeAnnotationContext).Release()); + } + + TStatus DoTransform(TExprNode::TPtr input, TExprNode::TPtr& output, TExprContext& ctx) final { + if (input->Type() == TExprNode::Callable) { + if (auto handler = Handlers_.FindPtr(input->Content())) { + return (*handler)(input, output, ctx); + } + } + + auto status = OriginalTransformer_->Transform(input, output, ctx); + + YQL_ENSURE(status.Level != IGraphTransformer::TStatus::Async, "Async type check is not supported"); + + return status; + } + + void Rewind() final { + OriginalTransformer_->Rewind(); + } + + protected: + void AddHandler(std::initializer_list<TStringBuf> names, THandler handler) { + for (auto name: names) { + YQL_ENSURE(Handlers_.emplace(name, handler).second, "Duplicate handler for " << name); + } + } + + template <class TDerived> + THandler Hndl(TStatus(TDerived::* handler)(const TExprNode::TPtr&, TExprNode::TPtr&, TExprContext&)) { + return [this, handler] (TExprNode::TPtr input, TExprNode::TPtr& output, TExprContext& ctx) { + return (static_cast<TDerived*>(this)->*handler)(input, output, ctx); + }; + } + + template <class TDerived> + THandler Hndl(TStatus(TDerived::* handler)(const TExprNode::TPtr&, TExprContext&)) { + return [this, handler] (TExprNode::TPtr input, TExprNode::TPtr& /*output*/, TExprContext& ctx) { + return (static_cast<TDerived*>(this)->*handler)(input, ctx); + }; + } + + private: + std::shared_ptr<IGraphTransformer> OriginalTransformer_; + THashMap<TStringBuf, THandler> Handlers_; + }; + + class TTypeAnnotator : public TTypeAnnotatorBase { + private: + TTypeAnnotationContextPtr TypeAnnotationContext_; + const TVector<const TStructExprType*>& InputStructs_; + EProcessorMode ProcessorMode_; + TString InputNodeName_; + + public: + TTypeAnnotator( + TTypeAnnotationContextPtr typeAnnotationContext, + const TVector<const TStructExprType*>& inputStructs, + EProcessorMode processorMode, + TString nodeName + ) + : TTypeAnnotatorBase(typeAnnotationContext) + , InputStructs_(inputStructs) + , ProcessorMode_(processorMode) + , InputNodeName_(std::move(nodeName)) + { + AddHandler({InputNodeName_}, Hndl(&TTypeAnnotator::HandleInputNode)); + AddHandler({NNodes::TCoTableName::CallableName()}, Hndl(&TTypeAnnotator::HandleTableName)); + AddHandler({NNodes::TCoTablePath::CallableName()}, Hndl(&TTypeAnnotator::HandleTablePath)); + AddHandler({NNodes::TCoHoppingTraits::CallableName()}, Hndl(&TTypeAnnotator::HandleHoppingTraits)); + } + + TTypeAnnotator(TTypeAnnotationContextPtr, TVector<const TStructExprType*>&&, EProcessorMode, TString) = delete; + + private: + TStatus HandleInputNode(const TExprNode::TPtr& input, TExprContext& ctx) { + ui32 inputIndex; + if (!TryFetchInputIndexFromSelf(*input, ctx, InputStructs_.size(), inputIndex)) { + return IGraphTransformer::TStatus::Error; + } + + YQL_ENSURE(inputIndex < InputStructs_.size()); + + if (ProcessorMode_ != EProcessorMode::PullList) { + input->SetTypeAnn(ctx.MakeType<TStreamExprType>(InputStructs_[inputIndex])); + } else { + input->SetTypeAnn(ctx.MakeType<TListExprType>(InputStructs_[inputIndex])); + } + + return TStatus::Ok; + } + + TStatus HandleTableName(const TExprNode::TPtr& input, TExprNode::TPtr& output, TExprContext& ctx) { + if (!EnsureMinMaxArgsCount(*input, 1, 2, ctx)) { + return TStatus::Error; + } + + if (input->ChildrenSize() > 1) { + if (!EnsureAtom(input->Tail(), ctx)) { + return TStatus::Error; + } + + if (input->Tail().Content() != PurecalcDefaultService) { + ctx.AddError( + TIssue( + ctx.GetPosition(input->Tail().Pos()), + TStringBuilder() << "Unsupported system: " << input->Tail().Content())); + return TStatus::Error; + } + } + + if (input->Head().IsCallable(NNodes::TCoDependsOn::CallableName())) { + if (!EnsureArgsCount(input->Head(), 1, ctx)) { + return TStatus::Error; + } + + if (!TryBuildTableNameNode(input->Pos(), input->Head().HeadPtr(), output, ctx)) { + return TStatus::Error; + } + } else { + if (!EnsureSpecificDataType(input->Head(), EDataSlot::String, ctx)) { + return TStatus::Error; + } + output = input->HeadPtr(); + } + + return TStatus::Repeat; + } + + TStatus HandleTablePath(const TExprNode::TPtr& input, TExprNode::TPtr& output, TExprContext& ctx) { + if (!EnsureArgsCount(*input, 1, ctx)) { + return TStatus::Error; + } + + if (!EnsureDependsOn(input->Head(), ctx)) { + return TStatus::Error; + } + + if (!EnsureArgsCount(input->Head(), 1, ctx)) { + return TStatus::Error; + } + + if (!TryBuildTableNameNode(input->Pos(), input->Head().HeadPtr(), output, ctx)) { + return TStatus::Error; + } + + return TStatus::Repeat; + } + + TStatus HandleHoppingTraits(const TExprNode::TPtr& input, TExprNode::TPtr& output, TExprContext& ctx) { + Y_UNUSED(output); + if (input->ChildrenSize() == 1) { + auto children = input->ChildrenList(); + auto falseArg = ctx.Builder(input->Pos()) + .Atom("false") + .Seal() + .Build(); + children.emplace_back(falseArg); + input->ChangeChildrenInplace(std::move(children)); + return TStatus::Repeat; + } + + return TStatus::Ok; + } + + private: + bool TryBuildTableNameNode( + TPositionHandle position, const TExprNode::TPtr& row, TExprNode::TPtr& result, TExprContext& ctx) + { + if (!EnsureStructType(*row, ctx)) { + return false; + } + + const auto* structType = row->GetTypeAnn()->Cast<TStructExprType>(); + + if (auto pos = structType->FindItem(PurecalcSysColumnTablePath)) { + if (!EnsureSpecificDataType(row->Pos(), *structType->GetItems()[*pos]->GetItemType(), EDataSlot::String, ctx)) { + return false; + } + + result = ctx.Builder(position) + .Callable(NNodes::TCoMember::CallableName()) + .Add(0, row) + .Atom(1, PurecalcSysColumnTablePath) + .Seal() + .Build(); + } else { + result = ctx.Builder(position) + .Callable(NNodes::TCoString::CallableName()) + .Atom(0, "") + .Seal() + .Build(); + } + + return true; + } + }; +} + +TAutoPtr<IGraphTransformer> NYql::NPureCalc::MakeTypeAnnotationTransformer( + TTypeAnnotationContextPtr typeAnnotationContext, + const TVector<const TStructExprType*>& inputStructs, + EProcessorMode processorMode, + const TString& nodeName +) { + return new TTypeAnnotator(typeAnnotationContext, inputStructs, processorMode, nodeName); +} diff --git a/ydb/library/yql/public/purecalc/common/transformations/type_annotation.h b/ydb/library/yql/public/purecalc/common/transformations/type_annotation.h new file mode 100644 index 0000000000..05a3674ff8 --- /dev/null +++ b/ydb/library/yql/public/purecalc/common/transformations/type_annotation.h @@ -0,0 +1,28 @@ +#pragma once + +#include <ydb/library/yql/public/purecalc/common/names.h> +#include <ydb/library/yql/public/purecalc/common/processor_mode.h> + +#include <ydb/library/yql/core/yql_graph_transformer.h> +#include <ydb/library/yql/core/yql_type_annotation.h> + +namespace NYql { + namespace NPureCalc { + /** + * Build type annotation transformer that is aware of type of the input rows. + * + * @param typeAnnotationContext current context. + * @param inputStructs types of each input. + * @param processorMode current processor mode. This will affect generated input type, + * e.g. list node or struct node. + * @param nodeName name of the callable used to get input data, e.g. `Self`. + * @return a graph transformer for type annotation. + */ + TAutoPtr<IGraphTransformer> MakeTypeAnnotationTransformer( + TTypeAnnotationContextPtr typeAnnotationContext, + const TVector<const TStructExprType*>& inputStructs, + EProcessorMode processorMode, + const TString& nodeName = TString{PurecalcInputCallableName} + ); + } +} diff --git a/ydb/library/yql/public/purecalc/common/type_from_schema.cpp b/ydb/library/yql/public/purecalc/common/type_from_schema.cpp new file mode 100644 index 0000000000..7579481335 --- /dev/null +++ b/ydb/library/yql/public/purecalc/common/type_from_schema.cpp @@ -0,0 +1,255 @@ +#include "type_from_schema.h" + +#include <library/cpp/yson/node/node_io.h> + +#include <ydb/library/yql/core/yql_expr_type_annotation.h> +#include <ydb/library/yql/providers/common/schema/expr/yql_expr_schema.h> + +namespace { + using namespace NYql; + +#define REPORT(...) ctx.AddError(TIssue(TString(TStringBuilder() << __VA_ARGS__))) + + bool CheckStruct(const TStructExprType* got, const TStructExprType* expected, TExprContext& ctx) { + auto status = true; + + if (expected) { + for (const auto* gotNamedItem : got->GetItems()) { + auto expectedIndex = expected->FindItem(gotNamedItem->GetName()); + if (expectedIndex) { + const auto* gotItem = gotNamedItem->GetItemType(); + const auto* expectedItem = expected->GetItems()[*expectedIndex]->GetItemType(); + + auto arg = ctx.NewArgument(TPositionHandle(), "arg"); + auto fieldConversionStatus = TrySilentConvertTo(arg, *gotItem, *expectedItem, ctx); + if (fieldConversionStatus.Level == IGraphTransformer::TStatus::Error) { + REPORT("Item " << TString{gotNamedItem->GetName()}.Quote() << " expected to be " << + *expectedItem << ", but got " << *gotItem); + status = false; + } + } else { + REPORT("Got unexpected item " << TString{gotNamedItem->GetName()}.Quote()); + status = false; + } + } + + for (const auto* expectedNamedItem : expected->GetItems()) { + if (expectedNamedItem->GetItemType()->GetKind() == ETypeAnnotationKind::Optional) { + continue; + } + if (!got->FindItem(expectedNamedItem->GetName())) { + REPORT("Expected item " << TString{expectedNamedItem->GetName()}.Quote()); + status = false; + } + } + } + + return status; + } + + bool CheckVariantContent(const TStructExprType* got, const TStructExprType* expected, TExprContext& ctx) { + auto status = true; + + if (expected) { + for (const auto* gotNamedItem : got->GetItems()) { + if (!expected->FindItem(gotNamedItem->GetName())) { + REPORT("Got unexpected alternative " << TString{gotNamedItem->GetName()}.Quote()); + status = false; + } + } + + for (const auto* expectedNamedItem : expected->GetItems()) { + if (!got->FindItem(expectedNamedItem->GetName())) { + REPORT("Expected alternative " << TString{expectedNamedItem->GetName()}.Quote()); + status = false; + } + } + } + + for (const auto* gotNamedItem : got->GetItems()) { + const auto* gotItem = gotNamedItem->GetItemType(); + auto expectedIndex = expected ? expected->FindItem(gotNamedItem->GetName()) : Nothing(); + const auto* expectedItem = expected && expectedIndex ? expected->GetItems()[*expectedIndex]->GetItemType() : nullptr; + + TIssueScopeGuard issueScope(ctx.IssueManager, [&]() { + return new TIssue(TPosition(), TStringBuilder() << "Alternative " << TString{gotNamedItem->GetName()}.Quote()); + }); + + if (expectedItem && expectedItem->GetKind() != gotItem->GetKind()) { + REPORT("Expected to be " << expectedItem->GetKind() << ", but got " << gotItem->GetKind()); + status = false; + } + + if (gotItem->GetKind() != ETypeAnnotationKind::Struct) { + REPORT("Expected to be Struct, but got " << gotItem->GetKind()); + status = false; + } + + const auto* gotStruct = gotItem->Cast<TStructExprType>(); + const auto* expectedStruct = expectedItem ? expectedItem->Cast<TStructExprType>() : nullptr; + + if (!CheckStruct(gotStruct, expectedStruct, ctx)) { + status = false; + } + } + + return status; + } + + bool CheckVariantContent(const TTupleExprType* got, const TTupleExprType* expected, TExprContext& ctx) { + if (expected && expected->GetSize() != got->GetSize()) { + REPORT("Expected to have " << expected->GetSize() << " alternatives, but got " << got->GetSize()); + return false; + } + + auto status = true; + + for (size_t i = 0; i < got->GetSize(); i++) { + const auto* gotItem = got->GetItems()[i]; + const auto* expectedItem = expected ? expected->GetItems()[i] : nullptr; + + TIssueScopeGuard issueScope(ctx.IssueManager, [i]() { + return new TIssue(TPosition(), TStringBuilder() << "Alternative #" << i); + }); + + if (expectedItem && expectedItem->GetKind() != gotItem->GetKind()) { + REPORT("Expected " << expectedItem->GetKind() << ", but got " << gotItem->GetKind()); + status = false; + } + + if (gotItem->GetKind() != ETypeAnnotationKind::Struct) { + REPORT("Expected Struct, but got " << gotItem->GetKind()); + status = false; + } + + const auto* gotStruct = gotItem->Cast<TStructExprType>(); + const auto* expectedStruct = expectedItem ? expectedItem->Cast<TStructExprType>() : nullptr; + + if (!CheckStruct(gotStruct, expectedStruct, ctx)) { + status = false; + } + } + + return status; + } + + bool CheckVariant(const TVariantExprType* got, const TVariantExprType* expected, TExprContext& ctx) { + if (expected && expected->GetUnderlyingType()->GetKind() != got->GetUnderlyingType()->GetKind()) { + REPORT("Expected Variant over " << expected->GetUnderlyingType()->GetKind() << + ", but got Variant over " << got->GetUnderlyingType()->GetKind()); + return false; + } + + switch (got->GetUnderlyingType()->GetKind()) { + case ETypeAnnotationKind::Struct: + { + const auto* gotStruct = got->GetUnderlyingType()->Cast<TStructExprType>(); + const auto* expectedStruct = expected ? expected->GetUnderlyingType()->Cast<TStructExprType>() : nullptr; + return CheckVariantContent(gotStruct, expectedStruct, ctx); + } + case ETypeAnnotationKind::Tuple: + { + const auto* gotTuple = got->GetUnderlyingType()->Cast<TTupleExprType>(); + const auto* expectedTuple = expected ? expected->GetUnderlyingType()->Cast<TTupleExprType>() : nullptr; + return CheckVariantContent(gotTuple, expectedTuple, ctx); + } + default: + Y_UNREACHABLE(); + } + + return false; + } + + bool CheckSchema(const TTypeAnnotationNode* got, const TTypeAnnotationNode* expected, TExprContext& ctx, bool allowVariant) { + if (expected && expected->GetKind() != got->GetKind()) { + REPORT("Expected " << expected->GetKind() << ", but got " << got->GetKind()); + return false; + } + + switch (got->GetKind()) { + case ETypeAnnotationKind::Struct: + { + TIssueScopeGuard issueScope(ctx.IssueManager, []() { return new TIssue(TPosition(), "Toplevel struct"); }); + + const auto* gotStruct = got->Cast<TStructExprType>(); + const auto* expectedStruct = expected ? expected->Cast<TStructExprType>() : nullptr; + + if (!gotStruct->Validate(TPositionHandle(), ctx)) { + return false; + } + + return CheckStruct(gotStruct, expectedStruct, ctx); + } + case ETypeAnnotationKind::Variant: + if (allowVariant) { + TIssueScopeGuard issueScope(ctx.IssueManager, []() { return new TIssue(TPosition(), "Toplevel variant"); }); + + const auto* gotVariant = got->Cast<TVariantExprType>(); + const auto* expectedVariant = expected ? expected->Cast<TVariantExprType>() : nullptr; + + if (!gotVariant->Validate(TPositionHandle(), ctx)) { + return false; + } + + return CheckVariant(gotVariant, expectedVariant, ctx); + } + [[fallthrough]]; + default: + if (allowVariant) { + REPORT("Expected Struct or Variant, but got " << got->GetKind()); + } else { + REPORT("Expected Struct, but got " << got->GetKind()); + } + return false; + } + } +} + +namespace NYql::NPureCalc { + const TTypeAnnotationNode* MakeTypeFromSchema(const NYT::TNode& yson, TExprContext& ctx) { + const auto* type = NCommon::ParseTypeFromYson(yson, ctx); + + if (!type) { + ythrow TCompileError("", ctx.IssueManager.GetIssues().ToString()) + << "Incorrect schema: " << NYT::NodeToYsonString(yson, NYson::EYsonFormat::Text); + } + + return type; + } + + const TStructExprType* ExtendStructType( + const TStructExprType* type, const THashMap<TString, NYT::TNode>& extraColumns, TExprContext& ctx) + { + if (extraColumns.empty()) { + return type; + } + + auto items = type->GetItems(); + for (const auto& pair : extraColumns) { + items.push_back(ctx.MakeType<TItemExprType>(pair.first, MakeTypeFromSchema(pair.second, ctx))); + } + + auto result = ctx.MakeType<TStructExprType>(items); + + if (!result->Validate(TPosition(), ctx)) { + ythrow TCompileError("", ctx.IssueManager.GetIssues().ToString()) << "Incorrect extended struct type"; + } + + return result; + } + + bool ValidateInputSchema(const TTypeAnnotationNode* type, TExprContext& ctx) { + TIssueScopeGuard issueScope(ctx.IssueManager, []() { return new TIssue(TPosition(), "Input schema"); }); + return CheckSchema(type, nullptr, ctx, false); + } + + bool ValidateOutputSchema(const TTypeAnnotationNode* type, TExprContext& ctx) { + TIssueScopeGuard issueScope(ctx.IssueManager, []() { return new TIssue(TPosition(), "Output schema"); }); + return CheckSchema(type, nullptr, ctx, true); + } + + bool ValidateOutputType(const TTypeAnnotationNode* type, const TTypeAnnotationNode* expected, TExprContext& ctx) { + TIssueScopeGuard issueScope(ctx.IssueManager, []() { return new TIssue(TPosition(), "Program return type"); }); + return CheckSchema(type, expected, ctx, true); + } +} diff --git a/ydb/library/yql/public/purecalc/common/type_from_schema.h b/ydb/library/yql/public/purecalc/common/type_from_schema.h new file mode 100644 index 0000000000..395777bc6d --- /dev/null +++ b/ydb/library/yql/public/purecalc/common/type_from_schema.h @@ -0,0 +1,36 @@ +#pragma once + +#include <ydb/library/yql/public/purecalc/common/interface.h> + +#include <ydb/library/yql/ast/yql_expr.h> + +#include <library/cpp/yson/node/node.h> + +namespace NYql { + namespace NPureCalc { + /** + * Load struct type from yson. Use methods below to check returned type for correctness. + */ + const TTypeAnnotationNode* MakeTypeFromSchema(const NYT::TNode&, TExprContext&); + + /** + * Extend struct type with additional columns. Type of each extra column is loaded from yson. + */ + const TStructExprType* ExtendStructType(const TStructExprType*, const THashMap<TString, NYT::TNode>&, TExprContext&); + + /** + * Check if the given type can be used as an input schema, i.e. it is a struct. + */ + bool ValidateInputSchema(const TTypeAnnotationNode* type, TExprContext& ctx); + + /** + * Check if the given type can be used as an output schema, i.e. it is a struct or a variant of structs. + */ + bool ValidateOutputSchema(const TTypeAnnotationNode* type, TExprContext& ctx); + + /** + * Check if output type can be silently converted to the expected type. + */ + bool ValidateOutputType(const TTypeAnnotationNode* type, const TTypeAnnotationNode* expected, TExprContext& ctx); + } +} diff --git a/ydb/library/yql/public/purecalc/common/worker.cpp b/ydb/library/yql/public/purecalc/common/worker.cpp new file mode 100644 index 0000000000..b32560f420 --- /dev/null +++ b/ydb/library/yql/public/purecalc/common/worker.cpp @@ -0,0 +1,566 @@ +#include "worker.h" +#include "compile_mkql.h" + +#include <ydb/library/yql/ast/yql_expr.h> +#include <ydb/library/yql/core/yql_user_data.h> +#include <ydb/library/yql/core/yql_user_data_storage.h> +#include <ydb/library/yql/providers/common/comp_nodes/yql_factory.h> +#include <ydb/library/yql/public/purecalc/common/names.h> +#include <ydb/library/yql/minikql/mkql_function_registry.h> +#include <ydb/library/yql/minikql/mkql_node.h> +#include <ydb/library/yql/minikql/mkql_node_builder.h> +#include <ydb/library/yql/minikql/mkql_node_cast.h> +#include <ydb/library/yql/minikql/mkql_node_visitor.h> +#include <ydb/library/yql/minikql/mkql_node_serialization.h> +#include <ydb/library/yql/minikql/mkql_program_builder.h> +#include <ydb/library/yql/minikql/comp_nodes/mkql_factories.h> +#include <ydb/library/yql/minikql/computation/mkql_computation_node.h> +#include <ydb/library/yql/minikql/computation/mkql_computation_node_holders.h> +#include <ydb/library/yql/minikql/computation/mkql_computation_node_impl.h> +#include <ydb/library/yql/providers/common/mkql/yql_provider_mkql.h> +#include <ydb/library/yql/providers/common/mkql/yql_type_mkql.h> + +#include <library/cpp/random_provider/random_provider.h> +#include <library/cpp/time_provider/time_provider.h> + +#include <util/stream/file.h> +#include <ydb/library/yql/minikql/computation/mkql_custom_list.h> + +using namespace NYql; +using namespace NYql::NPureCalc; + +TWorkerGraph::TWorkerGraph( + const TExprNode::TPtr& exprRoot, + TExprContext& exprCtx, + const TString& serializedProgram, + const NKikimr::NMiniKQL::IFunctionRegistry& funcRegistry, + const TUserDataTable& userData, + const TVector<const TStructExprType*>& inputTypes, + const TVector<const TStructExprType*>& originalInputTypes, + const TTypeAnnotationNode* outputType, + const TString& LLVMSettings, + NKikimr::NUdf::ICountersProvider* countersProvider, + ui64 nativeYtTypeFlags, + TMaybe<ui64> deterministicTimeProviderSeed +) + : ScopedAlloc_(__LOCATION__, NKikimr::TAlignedPagePoolCounters(), funcRegistry.SupportsSizedAllocators()) + , Env_(ScopedAlloc_) + , FuncRegistry_(funcRegistry) + , RandomProvider_(CreateDefaultRandomProvider()) + , TimeProvider_(deterministicTimeProviderSeed ? + CreateDeterministicTimeProvider(*deterministicTimeProviderSeed) : + CreateDefaultTimeProvider()) + , LLVMSettings_(LLVMSettings) + , NativeYtTypeFlags_(nativeYtTypeFlags) +{ + // Build the root MKQL node + + NKikimr::NMiniKQL::TRuntimeNode rootNode; + if (exprRoot) { + rootNode = CompileMkql(exprRoot, exprCtx, FuncRegistry_, Env_, userData); + } else { + rootNode = NKikimr::NMiniKQL::DeserializeRuntimeNode(serializedProgram, Env_); + } + + // Prepare container for input nodes + + const ui32 inputsCount = inputTypes.size(); + + YQL_ENSURE(inputTypes.size() == originalInputTypes.size()); + + SelfNodes_.resize(inputsCount, nullptr); + + YQL_ENSURE(SelfNodes_.size() == inputsCount); + + // Setup struct types + + NKikimr::NMiniKQL::TProgramBuilder pgmBuilder(Env_, FuncRegistry_); + for (ui32 i = 0; i < inputsCount; ++i) { + const auto* type = static_cast<NKikimr::NMiniKQL::TStructType*>(NCommon::BuildType(TPositionHandle(), *inputTypes[i], pgmBuilder)); + const auto* originalType = type; + if (inputTypes[i] != originalInputTypes[i]) { + YQL_ENSURE(inputTypes[i]->GetSize() >= originalInputTypes[i]->GetSize()); + originalType = static_cast<NKikimr::NMiniKQL::TStructType*>(NCommon::BuildType(TPositionHandle(), *originalInputTypes[i], pgmBuilder)); + } + + InputTypes_.push_back(type); + OriginalInputTypes_.push_back(originalType); + } + + if (outputType) { + OutputType_ = NCommon::BuildType(TPositionHandle(), *outputType, pgmBuilder); + } + if (!exprRoot) { + auto outMkqlType = rootNode.GetStaticType(); + if (outMkqlType->GetKind() == NKikimr::NMiniKQL::TType::EKind::List) { + outMkqlType = static_cast<NKikimr::NMiniKQL::TListType*>(outMkqlType)->GetItemType(); + } else if (outMkqlType->GetKind() == NKikimr::NMiniKQL::TType::EKind::Stream) { + outMkqlType = static_cast<NKikimr::NMiniKQL::TStreamType*>(outMkqlType)->GetItemType(); + } else { + ythrow TCompileError("", "") << "unexpected mkql output type " << NKikimr::NMiniKQL::TType::KindAsStr(outMkqlType->GetKind()); + } + if (OutputType_) { + if (!OutputType_->IsSameType(*outMkqlType)) { + ythrow TCompileError("", "") << "precompiled program output type doesn't match the output schema"; + } + } else { + OutputType_ = outMkqlType; + } + } + + // Compile computation pattern + + auto selfCallableName = Env_.InternName(PurecalcInputCallableName); + + NKikimr::NMiniKQL::TExploringNodeVisitor explorer; + explorer.Walk(rootNode.GetNode(), Env_); + + auto compositeNodeFactory = NKikimr::NMiniKQL::GetCompositeWithBuiltinFactory( + {NKikimr::NMiniKQL::GetYqlFactory()} + ); + + auto nodeFactory = [&]( + NKikimr::NMiniKQL::TCallable& callable, const NKikimr::NMiniKQL::TComputationNodeFactoryContext& ctx + ) -> NKikimr::NMiniKQL::IComputationNode* { + if (callable.GetType()->GetNameStr() == selfCallableName) { + YQL_ENSURE(callable.GetInputsCount() == 1, "Self takes exactly 1 argument"); + const auto inputIndex = AS_VALUE(NKikimr::NMiniKQL::TDataLiteral, callable.GetInput(0))->AsValue().Get<ui32>(); + YQL_ENSURE(inputIndex < inputsCount, "Self index is out of range"); + YQL_ENSURE(!SelfNodes_[inputIndex], "Self can be called at most once with each index"); + return SelfNodes_[inputIndex] = new NKikimr::NMiniKQL::TExternalComputationNode(ctx.Mutables); + } + else { + return compositeNodeFactory(callable, ctx); + } + }; + + NKikimr::NMiniKQL::TComputationPatternOpts computationPatternOpts( + ScopedAlloc_.Ref(), + Env_, + nodeFactory, + &funcRegistry, + NKikimr::NUdf::EValidateMode::None, + NKikimr::NUdf::EValidatePolicy::Exception, + LLVMSettings, + NKikimr::NMiniKQL::EGraphPerProcess::Multi, + nullptr, + countersProvider); + + ComputationPattern_ = NKikimr::NMiniKQL::MakeComputationPattern( + explorer, + rootNode, + { rootNode.GetNode() }, + computationPatternOpts); + + ComputationGraph_ = ComputationPattern_->Clone( + computationPatternOpts.ToComputationOptions(*RandomProvider_, *TimeProvider_)); + + ComputationGraph_->Prepare(); + + // Scoped alloc acquires itself on construction. We need to release it before returning control to user. + // Note that scoped alloc releases itself on destruction so it is no problem if the above code throws. + ScopedAlloc_.Release(); +} + +TWorkerGraph::~TWorkerGraph() { + // Remember, we've released scoped alloc in constructor? Now, we need to acquire it back before destroying. + ScopedAlloc_.Acquire(); +} + +template <typename TBase> +TWorker<TBase>::TWorker( + TWorkerFactoryPtr factory, + const TExprNode::TPtr& exprRoot, + TExprContext& exprCtx, + const TString& serializedProgram, + const NKikimr::NMiniKQL::IFunctionRegistry& funcRegistry, + const TUserDataTable& userData, + const TVector<const TStructExprType*>& inputTypes, + const TVector<const TStructExprType*>& originalInputTypes, + const TTypeAnnotationNode* outputType, + const TString& LLVMSettings, + NKikimr::NUdf::ICountersProvider* countersProvider, + ui64 nativeYtTypeFlags, + TMaybe<ui64> deterministicTimeProviderSeed +) + : WorkerFactory_(std::move(factory)) + , Graph_(exprRoot, exprCtx, serializedProgram, funcRegistry, userData, inputTypes, originalInputTypes, outputType, LLVMSettings, + countersProvider, nativeYtTypeFlags, deterministicTimeProviderSeed) +{ +} + +template <typename TBase> +inline ui32 TWorker<TBase>::GetInputsCount() const { + return Graph_.InputTypes_.size(); +} + +template <typename TBase> +inline const NKikimr::NMiniKQL::TStructType* TWorker<TBase>::GetInputType(ui32 inputIndex, bool original) const { + const auto& container = original ? Graph_.OriginalInputTypes_ : Graph_.InputTypes_; + + YQL_ENSURE(inputIndex < container.size(), "invalid input index (" << inputIndex << ") in GetInputType call"); + + return container[inputIndex]; +} + +template <typename TBase> +inline const NKikimr::NMiniKQL::TStructType* TWorker<TBase>::GetInputType(bool original) const { + const auto& container = original ? Graph_.OriginalInputTypes_ : Graph_.InputTypes_; + + YQL_ENSURE(container.size() == 1, "GetInputType() can be used only for single-input programs"); + + return container[0]; +} + +template <typename TBase> +inline const NKikimr::NMiniKQL::TType* TWorker<TBase>::GetOutputType() const { + return Graph_.OutputType_; +} + +template <typename TBase> +NYT::TNode TWorker<TBase>::MakeInputSchema(ui32 inputIndex) const { + auto p = WorkerFactory_.lock(); + YQL_ENSURE(p, "Access to destroyed worker factory"); + return p->MakeInputSchema(inputIndex); +} + +template <typename TBase> +NYT::TNode TWorker<TBase>::MakeInputSchema() const { + auto p = WorkerFactory_.lock(); + YQL_ENSURE(p, "Access to destroyed worker factory"); + return p->MakeInputSchema(); +} + +template <typename TBase> +NYT::TNode TWorker<TBase>::MakeOutputSchema() const { + auto p = WorkerFactory_.lock(); + YQL_ENSURE(p, "Access to destroyed worker factory"); + return p->MakeOutputSchema(); +} + +template <typename TBase> +NYT::TNode TWorker<TBase>::MakeOutputSchema(ui32) const { + auto p = WorkerFactory_.lock(); + YQL_ENSURE(p, "Access to destroyed worker factory"); + return p->MakeOutputSchema(); +} + +template <typename TBase> +NYT::TNode TWorker<TBase>::MakeOutputSchema(TStringBuf) const { + auto p = WorkerFactory_.lock(); + YQL_ENSURE(p, "Access to destroyed worker factory"); + return p->MakeOutputSchema(); +} + +template <typename TBase> +NYT::TNode TWorker<TBase>::MakeFullOutputSchema() const { + auto p = WorkerFactory_.lock(); + YQL_ENSURE(p, "Access to destroyed worker factory"); + return p->MakeFullOutputSchema(); +} + +template <typename TBase> +inline NKikimr::NMiniKQL::TScopedAlloc& TWorker<TBase>::GetScopedAlloc() { + return Graph_.ScopedAlloc_; +} + +template <typename TBase> +inline NKikimr::NMiniKQL::IComputationGraph& TWorker<TBase>::GetGraph() { + return *Graph_.ComputationGraph_; +} + +template <typename TBase> +inline const NKikimr::NMiniKQL::IFunctionRegistry& +TWorker<TBase>::GetFunctionRegistry() const { + return Graph_.FuncRegistry_; +} + +template <typename TBase> +inline NKikimr::NMiniKQL::TTypeEnvironment& +TWorker<TBase>::GetTypeEnvironment() { + return Graph_.Env_; +} + +template <typename TBase> +inline const TString& TWorker<TBase>::GetLLVMSettings() const { + return Graph_.LLVMSettings_; +} + +template <typename TBase> +inline ui64 TWorker<TBase>::GetNativeYtTypeFlags() const { + return Graph_.NativeYtTypeFlags_; +} + +template <typename TBase> +ITimeProvider* TWorker<TBase>::GetTimeProvider() const { + return Graph_.TimeProvider_.Get(); +} + +template <typename TBase> +void TWorker<TBase>::Release() { + if (auto p = WorkerFactory_.lock()) { + p->ReturnWorker(this); + } else { + delete this; + } +} + +TPullStreamWorker::~TPullStreamWorker() { + auto guard = Guard(GetScopedAlloc()); + Output_.Clear(); +} + +void TPullStreamWorker::SetInput(NKikimr::NUdf::TUnboxedValue&& value, ui32 inputIndex) { + const auto inputsCount = Graph_.SelfNodes_.size(); + + if (Y_UNLIKELY(inputIndex >= inputsCount)) { + ythrow yexception() << "invalid input index (" << inputIndex << ") in SetInput call"; + } + + if (HasInput_.size() < inputsCount) { + HasInput_.resize(inputsCount, false); + } + + if (Y_UNLIKELY(HasInput_[inputIndex])) { + ythrow yexception() << "input value for #" << inputIndex << " input is already set"; + } + + auto selfNode = Graph_.SelfNodes_[inputIndex]; + + if (selfNode) { + YQL_ENSURE(value); + selfNode->SetValue(Graph_.ComputationGraph_->GetContext(), std::move(value)); + } + + HasInput_[inputIndex] = true; + + if (CheckAllInputsSet()) { + Output_ = Graph_.ComputationGraph_->GetValue(); + } +} + +NKikimr::NUdf::TUnboxedValue& TPullStreamWorker::GetOutput() { + if (Y_UNLIKELY(!CheckAllInputsSet())) { + ythrow yexception() << "some input values have not been set"; + } + + return Output_; +} + +void TPullStreamWorker::Release() { + with_lock(GetScopedAlloc()) { + Output_ = NKikimr::NUdf::TUnboxedValue::Invalid(); + for (auto selfNode: Graph_.SelfNodes_) { + if (selfNode) { + selfNode->SetValue(Graph_.ComputationGraph_->GetContext(), NKikimr::NUdf::TUnboxedValue::Invalid()); + } + } + } + HasInput_.clear(); + TWorker<IPullStreamWorker>::Release(); +} + +TPullListWorker::~TPullListWorker() { + auto guard = Guard(GetScopedAlloc()); + Output_.Clear(); + OutputIterator_.Clear(); +} + +void TPullListWorker::SetInput(NKikimr::NUdf::TUnboxedValue&& value, ui32 inputIndex) { + const auto inputsCount = Graph_.SelfNodes_.size(); + + if (Y_UNLIKELY(inputIndex >= inputsCount)) { + ythrow yexception() << "invalid input index (" << inputIndex << ") in SetInput call"; + } + + if (HasInput_.size() < inputsCount) { + HasInput_.resize(inputsCount, false); + } + + if (Y_UNLIKELY(HasInput_[inputIndex])) { + ythrow yexception() << "input value for #" << inputIndex << " input is already set"; + } + + auto selfNode = Graph_.SelfNodes_[inputIndex]; + + if (selfNode) { + YQL_ENSURE(value); + selfNode->SetValue(Graph_.ComputationGraph_->GetContext(), std::move(value)); + } + + HasInput_[inputIndex] = true; + + if (CheckAllInputsSet()) { + Output_ = Graph_.ComputationGraph_->GetValue(); + ResetOutputIterator(); + } +} + +NKikimr::NUdf::TUnboxedValue& TPullListWorker::GetOutput() { + if (Y_UNLIKELY(!CheckAllInputsSet())) { + ythrow yexception() << "some input values have not been set"; + } + + return Output_; +} + +NKikimr::NUdf::TUnboxedValue& TPullListWorker::GetOutputIterator() { + if (Y_UNLIKELY(!CheckAllInputsSet())) { + ythrow yexception() << "some input values have not been set"; + } + + return OutputIterator_; +} + +void TPullListWorker::ResetOutputIterator() { + if (Y_UNLIKELY(!CheckAllInputsSet())) { + ythrow yexception() << "some input values have not been set"; + } + + OutputIterator_ = Output_.GetListIterator(); +} + +void TPullListWorker::Release() { + with_lock(GetScopedAlloc()) { + Output_ = NKikimr::NUdf::TUnboxedValue::Invalid(); + OutputIterator_ = NKikimr::NUdf::TUnboxedValue::Invalid(); + + for (auto selfNode: Graph_.SelfNodes_) { + if (selfNode) { + selfNode->SetValue(Graph_.ComputationGraph_->GetContext(), NKikimr::NUdf::TUnboxedValue::Invalid()); + } + } + } + HasInput_.clear(); + TWorker<IPullListWorker>::Release(); +} + +namespace { + class TPushStream final: public NKikimr::NMiniKQL::TCustomListValue { + private: + mutable bool HasIterator_ = false; + bool HasValue_ = false; + bool IsFinished_ = false; + NKikimr::NUdf::TUnboxedValue Value_ = NKikimr::NUdf::TUnboxedValue::Invalid(); + + public: + using TCustomListValue::TCustomListValue; + + public: + void SetValue(NKikimr::NUdf::TUnboxedValue&& value) { + Value_ = std::move(value); + HasValue_ = true; + } + + void SetFinished() { + IsFinished_ = true; + } + + NKikimr::NUdf::TUnboxedValue GetListIterator() const override { + YQL_ENSURE(!HasIterator_, "only one pass over input is supported"); + HasIterator_ = true; + return NKikimr::NUdf::TUnboxedValuePod(const_cast<TPushStream*>(this)); + } + + NKikimr::NUdf::EFetchStatus Fetch(NKikimr::NUdf::TUnboxedValue& result) override { + if (IsFinished_) { + return NKikimr::NUdf::EFetchStatus::Finish; + } else if (!HasValue_) { + return NKikimr::NUdf::EFetchStatus::Yield; + } else { + result = std::move(Value_); + HasValue_ = false; + return NKikimr::NUdf::EFetchStatus::Ok; + } + } + }; +} + +void TPushStreamWorker::FeedToConsumer() { + auto value = Graph_.ComputationGraph_->GetValue(); + + for (;;) { + NKikimr::NUdf::TUnboxedValue item; + auto status = value.Fetch(item); + + if (status != NKikimr::NUdf::EFetchStatus::Ok) { + break; + } + + Consumer_->OnObject(&item); + } +} + +void TPushStreamWorker::SetConsumer(THolder<IConsumer<const NKikimr::NUdf::TUnboxedValue*>> consumer) { + auto guard = Guard(GetScopedAlloc()); + const auto inputsCount = Graph_.SelfNodes_.size(); + + YQL_ENSURE(inputsCount < 2, "push stream mode doesn't support several inputs"); + YQL_ENSURE(!Consumer_, "consumer is already set"); + + Consumer_ = std::move(consumer); + + if (inputsCount == 1) { + SelfNode_ = Graph_.SelfNodes_[0]; + } + + if (SelfNode_) { + SelfNode_->SetValue( + Graph_.ComputationGraph_->GetContext(), + Graph_.ComputationGraph_->GetHolderFactory().Create<TPushStream>()); + } + + FeedToConsumer(); +} + +void TPushStreamWorker::Push(NKikimr::NUdf::TUnboxedValue&& value) { + YQL_ENSURE(Consumer_, "consumer is not set"); + YQL_ENSURE(!Finished_, "OnFinish has already been sent to the consumer; no new values can be pushed"); + + if (Y_LIKELY(SelfNode_)) { + static_cast<TPushStream*>(SelfNode_->GetValue(Graph_.ComputationGraph_->GetContext()).AsBoxed().Get())->SetValue(std::move(value)); + } + + FeedToConsumer(); +} + +void TPushStreamWorker::OnFinish() { + YQL_ENSURE(Consumer_, "consumer is not set"); + YQL_ENSURE(!Finished_, "already finished"); + + if (Y_LIKELY(SelfNode_)) { + static_cast<TPushStream*>(SelfNode_->GetValue(Graph_.ComputationGraph_->GetContext()).AsBoxed().Get())->SetFinished(); + } + + FeedToConsumer(); + + Consumer_->OnFinish(); + + Finished_ = true; +} + +void TPushStreamWorker::Release() { + with_lock(GetScopedAlloc()) { + Consumer_.Destroy(); + if (SelfNode_) { + SelfNode_->SetValue(Graph_.ComputationGraph_->GetContext(), NKikimr::NUdf::TUnboxedValue::Invalid()); + } + SelfNode_ = nullptr; + } + Finished_ = false; + TWorker<IPushStreamWorker>::Release(); +} + + +namespace NYql { + namespace NPureCalc { + template + class TWorker<IPullStreamWorker>; + + template + class TWorker<IPullListWorker>; + + template + class TWorker<IPushStreamWorker>; + } +} diff --git a/ydb/library/yql/public/purecalc/common/worker.h b/ydb/library/yql/public/purecalc/common/worker.h new file mode 100644 index 0000000000..4d1f0889db --- /dev/null +++ b/ydb/library/yql/public/purecalc/common/worker.h @@ -0,0 +1,168 @@ +#pragma once + +#include <ydb/library/yql/public/purecalc/common/interface.h> + +#include <ydb/library/yql/public/udf/udf_value.h> +#include <ydb/library/yql/ast/yql_expr.h> +#include <ydb/library/yql/core/yql_user_data.h> +#include <ydb/library/yql/minikql/mkql_alloc.h> +#include <ydb/library/yql/minikql/mkql_node.h> +#include <ydb/library/yql/minikql/mkql_node_visitor.h> +#include <ydb/library/yql/minikql/computation/mkql_computation_node.h> +#include <ydb/library/yql/providers/common/mkql/yql_provider_mkql.h> + +#include <memory> + +namespace NYql { + namespace NPureCalc { + struct TWorkerGraph { + TWorkerGraph( + const TExprNode::TPtr& exprRoot, + TExprContext& exprCtx, + const TString& serializedProgram, + const NKikimr::NMiniKQL::IFunctionRegistry& funcRegistry, + const TUserDataTable& userData, + const TVector<const TStructExprType*>& inputTypes, + const TVector<const TStructExprType*>& originalInputTypes, + const TTypeAnnotationNode* outputType, + const TString& LLVMSettings, + NKikimr::NUdf::ICountersProvider* countersProvider, + ui64 nativeYtTypeFlags, + TMaybe<ui64> deterministicTimeProviderSeed + ); + + ~TWorkerGraph(); + + NKikimr::NMiniKQL::TScopedAlloc ScopedAlloc_; + NKikimr::NMiniKQL::TTypeEnvironment Env_; + const NKikimr::NMiniKQL::IFunctionRegistry& FuncRegistry_; + TIntrusivePtr<IRandomProvider> RandomProvider_; + TIntrusivePtr<ITimeProvider> TimeProvider_; + NKikimr::NMiniKQL::IComputationPattern::TPtr ComputationPattern_; + THolder<NKikimr::NMiniKQL::IComputationGraph> ComputationGraph_; + TString LLVMSettings_; + ui64 NativeYtTypeFlags_; + TMaybe<TString> TimestampColumn_; + const NKikimr::NMiniKQL::TType* OutputType_; + TVector<NKikimr::NMiniKQL::IComputationExternalNode*> SelfNodes_; + TVector<const NKikimr::NMiniKQL::TStructType*> InputTypes_; + TVector<const NKikimr::NMiniKQL::TStructType*> OriginalInputTypes_; + }; + + template <typename TBase> + class TWorker: public TBase { + public: + using TWorkerFactoryPtr = std::weak_ptr<IWorkerFactory>; + private: + // Worker factory implementation should stay alive for this worker to operate correctly. + TWorkerFactoryPtr WorkerFactory_; + + protected: + TWorkerGraph Graph_; + + public: + TWorker( + TWorkerFactoryPtr factory, + const TExprNode::TPtr& exprRoot, + TExprContext& exprCtx, + const TString& serializedProgram, + const NKikimr::NMiniKQL::IFunctionRegistry& funcRegistry, + const TUserDataTable& userData, + const TVector<const TStructExprType*>& inputTypes, + const TVector<const TStructExprType*>& originalInputTypes, + const TTypeAnnotationNode* outputType, + const TString& LLVMSettings, + NKikimr::NUdf::ICountersProvider* countersProvider, + ui64 nativeYtTypeFlags, + TMaybe<ui64> deterministicTimeProviderSeed + ); + + public: + ui32 GetInputsCount() const override; + const NKikimr::NMiniKQL::TStructType* GetInputType(ui32, bool) const override; + const NKikimr::NMiniKQL::TStructType* GetInputType(bool) const override; + const NKikimr::NMiniKQL::TType* GetOutputType() const override; + NYT::TNode MakeInputSchema() const override; + NYT::TNode MakeInputSchema(ui32) const override; + NYT::TNode MakeOutputSchema() const override; + NYT::TNode MakeOutputSchema(ui32) const override; + NYT::TNode MakeOutputSchema(TStringBuf) const override; + NYT::TNode MakeFullOutputSchema() const override; + NKikimr::NMiniKQL::TScopedAlloc& GetScopedAlloc() override; + NKikimr::NMiniKQL::IComputationGraph& GetGraph() override; + const NKikimr::NMiniKQL::IFunctionRegistry& GetFunctionRegistry() const override; + NKikimr::NMiniKQL::TTypeEnvironment& GetTypeEnvironment() override; + const TString& GetLLVMSettings() const override; + ui64 GetNativeYtTypeFlags() const override; + ITimeProvider* GetTimeProvider() const override; + protected: + void Release() override; + }; + + class TPullStreamWorker final: public TWorker<IPullStreamWorker> { + private: + NKikimr::NUdf::TUnboxedValue Output_ = NKikimr::NUdf::TUnboxedValue::Invalid(); + TVector<bool> HasInput_; + + inline bool CheckAllInputsSet() { + return AllOf(HasInput_, [](bool x) { return x; }); + } + + public: + using TWorker::TWorker; + ~TPullStreamWorker(); + + public: + void SetInput(NKikimr::NUdf::TUnboxedValue&&, ui32) override; + NKikimr::NUdf::TUnboxedValue& GetOutput() override; + + protected: + void Release() override; + }; + + class TPullListWorker final: public TWorker<IPullListWorker> { + private: + NKikimr::NUdf::TUnboxedValue Output_ = NKikimr::NUdf::TUnboxedValue::Invalid(); + NKikimr::NUdf::TUnboxedValue OutputIterator_ = NKikimr::NUdf::TUnboxedValue::Invalid(); + TVector<bool> HasInput_; + + inline bool CheckAllInputsSet() { + return AllOf(HasInput_, [](bool x) { return x; }); + } + + public: + using TWorker::TWorker; + ~TPullListWorker(); + + public: + void SetInput(NKikimr::NUdf::TUnboxedValue&&, ui32) override; + NKikimr::NUdf::TUnboxedValue& GetOutput() override; + NKikimr::NUdf::TUnboxedValue& GetOutputIterator() override; + void ResetOutputIterator() override; + + protected: + void Release() override; + }; + + class TPushStreamWorker final: public TWorker<IPushStreamWorker> { + private: + THolder<IConsumer<const NKikimr::NUdf::TUnboxedValue*>> Consumer_{}; + bool Finished_ = false; + NKikimr::NMiniKQL::IComputationExternalNode* SelfNode_ = nullptr; + + public: + using TWorker::TWorker; + + private: + void FeedToConsumer(); + + public: + void SetConsumer(THolder<IConsumer<const NKikimr::NUdf::TUnboxedValue*>>) override; + void Push(NKikimr::NUdf::TUnboxedValue&&) override; + void OnFinish() override; + + protected: + void Release() override; + }; + } +} diff --git a/ydb/library/yql/public/purecalc/common/worker_factory.cpp b/ydb/library/yql/public/purecalc/common/worker_factory.cpp new file mode 100644 index 0000000000..223dee8c1b --- /dev/null +++ b/ydb/library/yql/public/purecalc/common/worker_factory.cpp @@ -0,0 +1,454 @@ +#include "worker_factory.h" + +#include "type_from_schema.h" +#include "worker.h" +#include "compile_mkql.h" + +#include <ydb/library/yql/sql/sql.h> +#include <ydb/library/yql/ast/yql_expr.h> +#include <ydb/library/yql/core/yql_expr_optimize.h> +#include <ydb/library/yql/core/yql_type_helpers.h> +#include <ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.h> +#include <ydb/library/yql/providers/common/codec/yql_codec.h> +#include <ydb/library/yql/providers/common/udf_resolve/yql_simple_udf_resolver.h> +#include <ydb/library/yql/providers/common/schema/expr/yql_expr_schema.h> +#include <ydb/library/yql/providers/common/provider/yql_provider.h> +#include <ydb/library/yql/minikql/mkql_node.h> +#include <ydb/library/yql/minikql/mkql_node_serialization.h> +#include <ydb/library/yql/minikql/mkql_alloc.h> +#include <ydb/library/yql/minikql/aligned_page_pool.h> +#include <ydb/library/yql/core/services/yql_transform_pipeline.h> +#include <ydb/library/yql/public/purecalc/common/names.h> +#include <ydb/library/yql/public/purecalc/common/transformations/type_annotation.h> +#include <ydb/library/yql/public/purecalc/common/transformations/align_output_schema.h> +#include <ydb/library/yql/public/purecalc/common/transformations/extract_used_columns.h> +#include <ydb/library/yql/public/purecalc/common/transformations/output_columns_filter.h> +#include <ydb/library/yql/public/purecalc/common/transformations/replace_table_reads.h> +#include <ydb/library/yql/utils/log/log.h> +#include <util/stream/trace.h> + +using namespace NYql; +using namespace NYql::NPureCalc; + +template <typename TBase> +TWorkerFactory<TBase>::TWorkerFactory(TWorkerFactoryOptions options, EProcessorMode processorMode) + : Factory_(std::move(options.Factory)) + , FuncRegistry_(std::move(options.FuncRegistry)) + , UserData_(std::move(options.UserData)) + , LLVMSettings_(std::move(options.LLVMSettings)) + , CountersProvider_(options.CountersProvider_) + , NativeYtTypeFlags_(options.NativeYtTypeFlags_) + , DeterministicTimeProviderSeed_(options.DeterministicTimeProviderSeed_) + , UseSystemColumns_(options.UseSystemColumns) + , UseWorkerPool_(options.UseWorkerPool) +{ + // Prepare input struct types and extract all column names from inputs + + const auto& inputSchemas = options.InputSpec.GetSchemas(); + const auto& allVirtualColumns = options.InputSpec.GetAllVirtualColumns(); + + YQL_ENSURE(inputSchemas.size() == allVirtualColumns.size()); + + const auto inputsCount = inputSchemas.size(); + + for (ui32 i = 0; i < inputsCount; ++i) { + const auto* originalInputType = MakeTypeFromSchema(inputSchemas[i], ExprContext_); + if (!ValidateInputSchema(originalInputType, ExprContext_)) { + ythrow TCompileError("", ExprContext_.IssueManager.GetIssues().ToString()) << "invalid schema for #" << i << " input"; + } + + const auto* originalStructType = originalInputType->template Cast<TStructExprType>(); + const auto* structType = ExtendStructType(originalStructType, allVirtualColumns[i], ExprContext_); + + InputTypes_.push_back(structType); + OriginalInputTypes_.push_back(originalStructType); + + auto& columnsSet = AllColumns_.emplace_back(); + for (const auto* structItem : structType->GetItems()) { + columnsSet.insert(TString(structItem->GetName())); + + if (!UseSystemColumns_ && structItem->GetName().StartsWith(PurecalcSysColumnsPrefix)) { + ythrow TCompileError("", ExprContext_.IssueManager.GetIssues().ToString()) + << "#" << i << " input provides system column " << structItem->GetName() + << ", but it is forbidden by options"; + } + } + } + + // Prepare output type + + auto outputSchema = options.OutputSpec.GetSchema(); + if (!outputSchema.IsNull()) { + OutputType_ = MakeTypeFromSchema(outputSchema, ExprContext_); + if (!ValidateOutputSchema(OutputType_, ExprContext_)) { + ythrow TCompileError("", ExprContext_.IssueManager.GetIssues().ToString()) << "invalid output schema"; + } + } else { + OutputType_ = nullptr; + } + + // Translate + + if (options.TranslationMode_ == ETranslationMode::Mkql) { + SerializedProgram_ = TString{options.Query}; + } else { + ExprRoot_ = Compile(options.Query, ETranslationMode::SQL == options.TranslationMode_, + options.ModuleResolver, options.SyntaxVersion_, options.Modules, options.OutputSpec, processorMode); + + // Deduce output type if it wasn't provided by output spec + + if (!OutputType_) { + OutputType_ = GetSequenceItemType(ExprRoot_->Pos(), ExprRoot_->GetTypeAnn(), true, ExprContext_); + } + if (!OutputType_) { + ythrow TCompileError("", ExprContext_.IssueManager.GetIssues().ToString()) << "cannot deduce output schema"; + } + } +} + +template <typename TBase> +TExprNode::TPtr TWorkerFactory<TBase>::Compile( + TStringBuf query, + bool sql, + IModuleResolver::TPtr moduleResolver, + ui16 syntaxVersion, + const THashMap<TString, TString>& modules, + const TOutputSpecBase& outputSpec, + EProcessorMode processorMode +) { + // Prepare type annotation context + + TTypeAnnotationContextPtr typeContext; + + typeContext = MakeIntrusive<TTypeAnnotationContext>(); + typeContext->RandomProvider = CreateDefaultRandomProvider(); + typeContext->TimeProvider = DeterministicTimeProviderSeed_ ? + CreateDeterministicTimeProvider(*DeterministicTimeProviderSeed_) : + CreateDefaultTimeProvider(); + typeContext->UdfResolver = NCommon::CreateSimpleUdfResolver(FuncRegistry_.Get()); + typeContext->UserDataStorage = MakeIntrusive<TUserDataStorage>(nullptr, UserData_, nullptr, nullptr); + typeContext->Modules = moduleResolver; + typeContext->Initialize(ExprContext_); + + if (auto modules = dynamic_cast<TModuleResolver*>(moduleResolver.get())) { + modules->AttachUserData(typeContext->UserDataStorage); + } + + // Parse SQL/s-expr into AST + + TAstParseResult astRes; + + if (sql) { + NSQLTranslation::TTranslationSettings settings; + + typeContext->DeprecatedSQL = (syntaxVersion == 0); + settings.SyntaxVersion = syntaxVersion; + settings.V0Behavior = NSQLTranslation::EV0Behavior::Disable; + settings.Mode = NSQLTranslation::ESqlMode::LIMITED_VIEW; + settings.DefaultCluster = PurecalcDefaultCluster; + settings.ClusterMapping[settings.DefaultCluster] = PurecalcDefaultService; + settings.ModuleMapping = modules; + settings.EnableGenericUdfs = true; + settings.File = "generated.sql"; + for (const auto& [key, block] : UserData_) { + TStringBuf alias(key.Alias()); + if (block.Usage.Test(EUserDataBlockUsage::Library) && !alias.StartsWith("/lib")) { + alias.SkipPrefix("/home/"); + settings.Libraries.emplace(alias); + } + } + + astRes = SqlToYql(TString(query), settings); + } else { + astRes = ParseAst(TString(query)); + } + + if (!astRes.IsOk()) { + ythrow TCompileError(TString(query), astRes.Issues.ToString()) << "failed to parse " << (sql ? ETranslationMode::SQL : ETranslationMode::SExpr); + } + + ExprContext_.IssueManager.AddIssues(astRes.Issues); + + if (ETraceLevel::TRACE_DETAIL <= StdDbgLevel()) { + Cdbg << "Before optimization:" << Endl; + astRes.Root->PrettyPrintTo(Cdbg, TAstPrintFlags::PerLine | TAstPrintFlags::ShortQuote | TAstPrintFlags::AdaptArbitraryContent); + } + + // Translate AST into expression + + TExprNode::TPtr exprRoot; + if (!CompileExpr(*astRes.Root, exprRoot, ExprContext_, moduleResolver.get(), 0, syntaxVersion)) { + TStringStream astStr; + astRes.Root->PrettyPrintTo(astStr, TAstPrintFlags::ShortQuote | TAstPrintFlags::PerLine); + ythrow TCompileError(astStr.Str(), ExprContext_.IssueManager.GetIssues().ToString()) << "failed to compile"; + } + + + // Prepare transformation pipeline + THolder<IGraphTransformer> calcTransformer = CreateFunctorTransformer([&](TExprNode::TPtr input, TExprNode::TPtr& output, TExprContext& ctx) + -> IGraphTransformer::TStatus + { + output = input; + auto valueNode = input->HeadPtr(); + + auto peepHole = MakePeepholeOptimization(typeContext); + auto status = SyncTransform(*peepHole, valueNode, ctx); + if (status != IGraphTransformer::TStatus::Ok) { + return status; + } + + TStringStream out; + NYson::TYsonWriter writer(&out, NYson::EYsonFormat::Text, ::NYson::EYsonType::Node, true); + writer.OnBeginMap(); + + writer.OnKeyedItem("Data"); + + TWorkerGraph graph( + valueNode, + ctx, + {}, + *FuncRegistry_, + UserData_, + {}, + {}, + valueNode->GetTypeAnn(), + LLVMSettings_, + CountersProvider_, + NativeYtTypeFlags_, + DeterministicTimeProviderSeed_ + ); + + with_lock (graph.ScopedAlloc_) { + const auto value = graph.ComputationGraph_->GetValue(); + NCommon::WriteYsonValue(writer, value, const_cast<NKikimr::NMiniKQL::TType*>(graph.OutputType_), nullptr); + } + writer.OnEndMap(); + + auto ysonAtom = ctx.NewAtom(TPositionHandle(), out.Str()); + input->SetResult(std::move(ysonAtom)); + return IGraphTransformer::TStatus::Ok; + }); + + TTransformationPipeline pipeline(typeContext); + + pipeline.Add(MakeTableReadsReplacer(InputTypes_.size(), UseSystemColumns_), + "ReplaceTableReads", EYqlIssueCode::TIssuesIds_EIssueCode_DEFAULT_ERROR, + "Replace reads from tables"); + pipeline.AddServiceTransformers(); + pipeline.AddPreTypeAnnotation(); + pipeline.AddExpressionEvaluation(*FuncRegistry_, calcTransformer.Get()); + pipeline.AddIOAnnotation(); + pipeline.AddTypeAnnotationTransformer(MakeTypeAnnotationTransformer(typeContext, InputTypes_, processorMode)); + pipeline.AddPostTypeAnnotation(); + pipeline.Add(CreateFunctorTransformer( + [&](const TExprNode::TPtr& input, TExprNode::TPtr& output, TExprContext& ctx) { + return OptimizeExpr(input, output, [](const TExprNode::TPtr& node, TExprContext&) -> TExprNode::TPtr { + if (node->IsCallable("Unordered") && node->Child(0)->IsCallable(PurecalcInputCallableName)) { + return node->ChildPtr(0); + } + return node; + }, ctx, TOptimizeExprSettings(nullptr)); + }), "Unordered", EYqlIssueCode::TIssuesIds_EIssueCode_DEFAULT_ERROR, + "Unordered optimizations"); + pipeline.Add(MakeOutputColumnsFilter(outputSpec.GetOutputColumnsFilter()), + "Filter", EYqlIssueCode::TIssuesIds_EIssueCode_DEFAULT_ERROR, + "Filter output columns"); + pipeline.Add(MakeOutputAligner(OutputType_, processorMode), + "Convert", EYqlIssueCode::TIssuesIds_EIssueCode_DEFAULT_ERROR, + "Align return type of the program to output schema"); + pipeline.AddCommonOptimization(); + pipeline.AddFinalCommonOptimization(); + pipeline.Add(MakeUsedColumnsExtractor(&UsedColumns_, AllColumns_), + "ExtractColumns", EYqlIssueCode::TIssuesIds_EIssueCode_DEFAULT_ERROR, + "Extract used columns"); + pipeline.Add(MakePeepholeOptimization(typeContext), + "PeepHole", EYqlIssueCode::TIssuesIds_EIssueCode_DEFAULT_ERROR, + "Peephole optimizations"); + pipeline.AddCheckExecution(false); + + // Apply optimizations + + auto transformer = pipeline.Build(); + auto status = SyncTransform(*transformer, exprRoot, ExprContext_); + auto transformStats = transformer->GetStatistics(); + TStringStream out; + NYson::TYsonWriter writer(&out, NYson::EYsonFormat::Pretty); + NCommon::TransformerStatsToYson("", transformStats, writer); + YQL_CLOG(DEBUG, Core) << "Transform stats: " << out.Str(); + if (status == IGraphTransformer::TStatus::Error) { + ythrow TCompileError("", ExprContext_.IssueManager.GetIssues().ToString()) << "Failed to optimize"; + } + + if (ETraceLevel::TRACE_DETAIL <= StdDbgLevel()) { + Cdbg << "After optimization:" << Endl; + ConvertToAst(*exprRoot, ExprContext_, 0, true).Root->PrettyPrintTo(Cdbg, TAstPrintFlags::PerLine | TAstPrintFlags::ShortQuote | TAstPrintFlags::AdaptArbitraryContent); + } + return exprRoot; +} + +template <typename TBase> +NYT::TNode TWorkerFactory<TBase>::MakeInputSchema(ui32 inputIndex) const { + Y_ENSURE( + inputIndex < InputTypes_.size(), + "invalid input index (" << inputIndex << ") in MakeInputSchema call"); + + return NCommon::TypeToYsonNode(InputTypes_[inputIndex]); +} + +template <typename TBase> +NYT::TNode TWorkerFactory<TBase>::MakeInputSchema() const { + Y_ENSURE( + InputTypes_.size() == 1, + "MakeInputSchema() can be used only with single-input programs"); + + return NCommon::TypeToYsonNode(InputTypes_[0]); +} + +template <typename TBase> +NYT::TNode TWorkerFactory<TBase>::MakeOutputSchema() const { + Y_ENSURE(OutputType_, "MakeOutputSchema() cannot be used with precompiled programs"); + Y_ENSURE( + OutputType_->GetKind() == ETypeAnnotationKind::Struct, + "MakeOutputSchema() cannot be used with multi-output programs"); + + return NCommon::TypeToYsonNode(OutputType_); +} + +template <typename TBase> +NYT::TNode TWorkerFactory<TBase>::MakeOutputSchema(ui32 index) const { + Y_ENSURE(OutputType_, "MakeOutputSchema() cannot be used with precompiled programs"); + Y_ENSURE( + OutputType_->GetKind() == ETypeAnnotationKind::Variant, + "MakeOutputSchema(ui32) cannot be used with single-output programs"); + + auto vtype = OutputType_->template Cast<TVariantExprType>(); + + Y_ENSURE( + vtype->GetUnderlyingType()->GetKind() == ETypeAnnotationKind::Tuple, + "MakeOutputSchema(ui32) cannot be used to process variants over struct"); + + auto ttype = vtype->GetUnderlyingType()->template Cast<TTupleExprType>(); + + Y_ENSURE( + index < ttype->GetSize(), + "Invalid table index " << index); + + return NCommon::TypeToYsonNode(ttype->GetItems()[index]); +} + +template <typename TBase> +NYT::TNode TWorkerFactory<TBase>::MakeOutputSchema(TStringBuf tableName) const { + Y_ENSURE(OutputType_, "MakeOutputSchema() cannot be used with precompiled programs"); + Y_ENSURE( + OutputType_->GetKind() == ETypeAnnotationKind::Variant, + "MakeOutputSchema(TStringBuf) cannot be used with single-output programs"); + + auto vtype = OutputType_->template Cast<TVariantExprType>(); + + Y_ENSURE( + vtype->GetUnderlyingType()->GetKind() == ETypeAnnotationKind::Struct, + "MakeOutputSchema(TStringBuf) cannot be used to process variants over tuple"); + + auto stype = vtype->GetUnderlyingType()->template Cast<TStructExprType>(); + + auto index = stype->FindItem(tableName); + + Y_ENSURE( + index.Defined(), + "Invalid table index " << TString{tableName}.Quote()); + + return NCommon::TypeToYsonNode(stype->GetItems()[*index]->GetItemType()); +} + +template <typename TBase> +NYT::TNode TWorkerFactory<TBase>::MakeFullOutputSchema() const { + Y_ENSURE(OutputType_, "MakeFullOutputSchema() cannot be used with precompiled programs"); + return NCommon::TypeToYsonNode(OutputType_); +} + +template <typename TBase> +const THashSet<TString>& TWorkerFactory<TBase>::GetUsedColumns(ui32 inputIndex) const { + Y_ENSURE( + inputIndex < UsedColumns_.size(), + "invalid input index (" << inputIndex << ") in GetUsedColumns call"); + + return UsedColumns_[inputIndex]; +} + +template <typename TBase> +const THashSet<TString>& TWorkerFactory<TBase>::GetUsedColumns() const { + Y_ENSURE( + UsedColumns_.size() == 1, + "GetUsedColumns() can be used only with single-input programs"); + + return UsedColumns_[0]; +} + +template <typename TBase> +TIssues TWorkerFactory<TBase>::GetIssues() const { + return ExprContext_.IssueManager.GetCompletedIssues(); +} + +template <typename TBase> +TString TWorkerFactory<TBase>::GetCompiledProgram() { + if (ExprRoot_) { + NKikimr::NMiniKQL::TScopedAlloc alloc(__LOCATION__, NKikimr::TAlignedPagePoolCounters(), + FuncRegistry_->SupportsSizedAllocators()); + NKikimr::NMiniKQL::TTypeEnvironment env(alloc); + + auto rootNode = CompileMkql(ExprRoot_, ExprContext_, *FuncRegistry_, env, UserData_); + return NKikimr::NMiniKQL::SerializeRuntimeNode(rootNode, env); + } + + return SerializedProgram_; +} + +template <typename TBase> +void TWorkerFactory<TBase>::ReturnWorker(IWorker* worker) { + THolder<IWorker> tmp(worker); + if (UseWorkerPool_) { + WorkerPool_.push_back(std::move(tmp)); + } +} + + +#define DEFINE_WORKER_MAKER(MODE) \ + TWorkerHolder<I##MODE##Worker> T##MODE##WorkerFactory::MakeWorker() { \ + if (!WorkerPool_.empty()) { \ + auto res = std::move(WorkerPool_.back()); \ + WorkerPool_.pop_back(); \ + return TWorkerHolder<I##MODE##Worker>((I##MODE##Worker *)res.Release()); \ + } \ + return TWorkerHolder<I##MODE##Worker>(new T##MODE##Worker( \ + weak_from_this(), \ + ExprRoot_, \ + ExprContext_, \ + SerializedProgram_, \ + *FuncRegistry_, \ + UserData_, \ + InputTypes_, \ + OriginalInputTypes_, \ + OutputType_, \ + LLVMSettings_, \ + CountersProvider_, \ + NativeYtTypeFlags_, \ + DeterministicTimeProviderSeed_ \ + )); \ + } + +DEFINE_WORKER_MAKER(PullStream) +DEFINE_WORKER_MAKER(PullList) +DEFINE_WORKER_MAKER(PushStream) + +namespace NYql { + namespace NPureCalc { + template + class TWorkerFactory<IPullStreamWorkerFactory>; + + template + class TWorkerFactory<IPullListWorkerFactory>; + + template + class TWorkerFactory<IPushStreamWorkerFactory>; + } +} diff --git a/ydb/library/yql/public/purecalc/common/worker_factory.h b/ydb/library/yql/public/purecalc/common/worker_factory.h new file mode 100644 index 0000000000..901e20fe88 --- /dev/null +++ b/ydb/library/yql/public/purecalc/common/worker_factory.h @@ -0,0 +1,157 @@ +#pragma once + +#include <ydb/library/yql/public/purecalc/common/interface.h> + +#include "processor_mode.h" + +#include <util/generic/ptr.h> +#include <ydb/library/yql/ast/yql_expr.h> +#include <ydb/library/yql/core/yql_user_data.h> +#include <ydb/library/yql/minikql/mkql_function_registry.h> +#include <ydb/library/yql/core/yql_type_annotation.h> +#include <utility> + +namespace NYql { + namespace NPureCalc { + struct TWorkerFactoryOptions { + IProgramFactoryPtr Factory; + const TInputSpecBase& InputSpec; + const TOutputSpecBase& OutputSpec; + TStringBuf Query; + TIntrusivePtr<NKikimr::NMiniKQL::IMutableFunctionRegistry> FuncRegistry; + IModuleResolver::TPtr ModuleResolver; + const TUserDataTable& UserData; + const THashMap<TString, TString>& Modules; + TString LLVMSettings; + NKikimr::NUdf::ICountersProvider* CountersProvider_; + ETranslationMode TranslationMode_; + ui16 SyntaxVersion_; + ui64 NativeYtTypeFlags_; + TMaybe<ui64> DeterministicTimeProviderSeed_; + bool UseSystemColumns; + bool UseWorkerPool; + + TWorkerFactoryOptions( + IProgramFactoryPtr Factory, + const TInputSpecBase& InputSpec, + const TOutputSpecBase& OutputSpec, + TStringBuf Query, + TIntrusivePtr<NKikimr::NMiniKQL::IMutableFunctionRegistry> FuncRegistry, + IModuleResolver::TPtr ModuleResolver, + const TUserDataTable& UserData, + const THashMap<TString, TString>& Modules, + TString LLVMSettings, + NKikimr::NUdf::ICountersProvider* CountersProvider, + ETranslationMode translationMode, + ui16 syntaxVersion, + ui64 nativeYtTypeFlags, + TMaybe<ui64> deterministicTimeProviderSeed, + bool useSystemColumns, + bool useWorkerPool + ) + : Factory(std::move(Factory)) + , InputSpec(InputSpec) + , OutputSpec(OutputSpec) + , Query(Query) + , FuncRegistry(std::move(FuncRegistry)) + , ModuleResolver(std::move(ModuleResolver)) + , UserData(UserData) + , Modules(Modules) + , LLVMSettings(std::move(LLVMSettings)) + , CountersProvider_(CountersProvider) + , TranslationMode_(translationMode) + , SyntaxVersion_(syntaxVersion) + , NativeYtTypeFlags_(nativeYtTypeFlags) + , DeterministicTimeProviderSeed_(deterministicTimeProviderSeed) + , UseSystemColumns(useSystemColumns) + , UseWorkerPool(useWorkerPool) + { + } + }; + + template <typename TBase> + class TWorkerFactory: public TBase { + private: + IProgramFactoryPtr Factory_; + + protected: + TIntrusivePtr<NKikimr::NMiniKQL::IMutableFunctionRegistry> FuncRegistry_; + const TUserDataTable& UserData_; + TExprContext ExprContext_; + TExprNode::TPtr ExprRoot_; + TString SerializedProgram_; + TVector<const TStructExprType*> InputTypes_; + TVector<const TStructExprType*> OriginalInputTypes_; + const TTypeAnnotationNode* OutputType_; + TVector<THashSet<TString>> AllColumns_; + TVector<THashSet<TString>> UsedColumns_; + TString LLVMSettings_; + NKikimr::NUdf::ICountersProvider* CountersProvider_; + ui64 NativeYtTypeFlags_; + TMaybe<ui64> DeterministicTimeProviderSeed_; + bool UseSystemColumns_; + bool UseWorkerPool_; + TVector<THolder<IWorker>> WorkerPool_; + + public: + TWorkerFactory(TWorkerFactoryOptions, EProcessorMode); + + public: + NYT::TNode MakeInputSchema(ui32) const override; + NYT::TNode MakeInputSchema() const override; + NYT::TNode MakeOutputSchema() const override; + NYT::TNode MakeOutputSchema(ui32) const override; + NYT::TNode MakeOutputSchema(TStringBuf) const override; + NYT::TNode MakeFullOutputSchema() const override; + const THashSet<TString>& GetUsedColumns(ui32 inputIndex) const override; + const THashSet<TString>& GetUsedColumns() const override; + TIssues GetIssues() const override; + TString GetCompiledProgram() override; + + protected: + void ReturnWorker(IWorker* worker) override; + + private: + TExprNode::TPtr Compile(TStringBuf query, + bool sql, + IModuleResolver::TPtr moduleResolver, + ui16 syntaxVersion, + const THashMap<TString, TString>& modules, + const TOutputSpecBase& outputSpec, + EProcessorMode processorMode); + }; + + class TPullStreamWorkerFactory final: public TWorkerFactory<IPullStreamWorkerFactory> { + public: + explicit TPullStreamWorkerFactory(TWorkerFactoryOptions options) + : TWorkerFactory(std::move(options), EProcessorMode::PullStream) + { + } + + public: + TWorkerHolder<IPullStreamWorker> MakeWorker() override; + }; + + class TPullListWorkerFactory final: public TWorkerFactory<IPullListWorkerFactory> { + public: + explicit TPullListWorkerFactory(TWorkerFactoryOptions options) + : TWorkerFactory(std::move(options), EProcessorMode::PullList) + { + } + + public: + TWorkerHolder<IPullListWorker> MakeWorker() override; + }; + + class TPushStreamWorkerFactory final: public TWorkerFactory<IPushStreamWorkerFactory> { + public: + explicit TPushStreamWorkerFactory(TWorkerFactoryOptions options) + : TWorkerFactory(std::move(options), EProcessorMode::PushStream) + { + } + + public: + TWorkerHolder<IPushStreamWorker> MakeWorker() override; + }; + } +} diff --git a/ydb/library/yql/public/purecalc/common/wrappers.cpp b/ydb/library/yql/public/purecalc/common/wrappers.cpp new file mode 100644 index 0000000000..c808d7b394 --- /dev/null +++ b/ydb/library/yql/public/purecalc/common/wrappers.cpp @@ -0,0 +1 @@ +#include "wrappers.h" diff --git a/ydb/library/yql/public/purecalc/common/wrappers.h b/ydb/library/yql/public/purecalc/common/wrappers.h new file mode 100644 index 0000000000..4d65e01271 --- /dev/null +++ b/ydb/library/yql/public/purecalc/common/wrappers.h @@ -0,0 +1,70 @@ +#pragma once + +#include "fwd.h" + +#include <util/generic/ptr.h> + +namespace NYql::NPureCalc::NPrivate { + template <typename TNew, typename TOld, typename TFunctor> + class TMappingStream final: public IStream<TNew> { + private: + THolder<IStream<TOld>> Old_; + TFunctor Functor_; + + public: + TMappingStream(THolder<IStream<TOld>> old, TFunctor functor) + : Old_(std::move(old)) + , Functor_(std::move(functor)) + { + } + + public: + TNew Fetch() override { + return Functor_(Old_->Fetch()); + } + }; + + template <typename TNew, typename TOld, typename TFunctor> + class TMappingConsumer final: public IConsumer<TNew> { + private: + THolder<IConsumer<TOld>> Old_; + TFunctor Functor_; + + public: + TMappingConsumer(THolder<IConsumer<TOld>> old, TFunctor functor) + : Old_(std::move(old)) + , Functor_(std::move(functor)) + { + } + + public: + void OnObject(TNew object) override { + Old_->OnObject(Functor_(object)); + } + + void OnFinish() override { + Old_->OnFinish(); + } + }; + + template <typename T, typename C> + class TNonOwningConsumer final: public IConsumer<T> { + private: + C Consumer; + + public: + explicit TNonOwningConsumer(const C& consumer) + : Consumer(consumer) + { + } + + public: + void OnObject(T t) override { + Consumer->OnObject(t); + } + + void OnFinish() override { + Consumer->OnFinish(); + } + }; +} diff --git a/ydb/library/yql/public/purecalc/common/ya.make b/ydb/library/yql/public/purecalc/common/ya.make new file mode 100644 index 0000000000..0994915641 --- /dev/null +++ b/ydb/library/yql/public/purecalc/common/ya.make @@ -0,0 +1,47 @@ +LIBRARY() + +SRCS( + compile_mkql.cpp + fwd.cpp + inspect_input.cpp + interface.cpp + logger_init.cpp + names.cpp + processor_mode.cpp + program_factory.cpp + transformations/align_output_schema.cpp + transformations/extract_used_columns.cpp + transformations/output_columns_filter.cpp + transformations/replace_table_reads.cpp + transformations/type_annotation.cpp + type_from_schema.cpp + worker.cpp + worker_factory.cpp + wrappers.cpp +) + +PEERDIR( + ydb/library/yql/sql/pg + ydb/library/yql/ast + ydb/library/yql/core/services + ydb/library/yql/core/services/mounts + ydb/library/yql/core/user_data + ydb/library/yql/minikql/comp_nodes/llvm + ydb/library/yql/utils/backtrace + ydb/library/yql/utils/log + ydb/library/yql/core + ydb/library/yql/core/type_ann + ydb/library/yql/parser/pg_wrapper + ydb/library/yql/providers/common/codec + ydb/library/yql/providers/common/comp_nodes + ydb/library/yql/providers/common/mkql + ydb/library/yql/providers/common/provider + ydb/library/yql/providers/common/schema/expr + ydb/library/yql/providers/common/udf_resolve +) + +YQL_LAST_ABI_VERSION() + +GENERATE_ENUM_SERIALIZATION(interface.h) + +END() diff --git a/ydb/library/yql/public/purecalc/examples/CMakeLists.txt b/ydb/library/yql/public/purecalc/examples/CMakeLists.txt new file mode 100644 index 0000000000..ad9eebe96e --- /dev/null +++ b/ydb/library/yql/public/purecalc/examples/CMakeLists.txt @@ -0,0 +1,11 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(protobuf) +add_subdirectory(protobuf_pull_list) +add_subdirectory(skiff_pull_list) diff --git a/ydb/library/yql/public/purecalc/examples/protobuf/CMakeLists.darwin-x86_64.txt b/ydb/library/yql/public/purecalc/examples/protobuf/CMakeLists.darwin-x86_64.txt new file mode 100644 index 0000000000..711c146299 --- /dev/null +++ b/ydb/library/yql/public/purecalc/examples/protobuf/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,64 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) + +add_executable(protobuf) +target_compile_options(protobuf PRIVATE + -DUSE_CURRENT_UDF_ABI_VERSION +) +target_link_libraries(protobuf PUBLIC + contrib-libs-cxxsupp + yutil + library-cpp-cpuid_check + yql-public-purecalc + purecalc-io_specs-protobuf + purecalc-helpers-stream + contrib-libs-protobuf +) +target_link_options(protobuf PRIVATE + -Wl,-platform_version,macos,11.0,11.0 + -fPIC + -fPIC + -framework + CoreFoundation +) +target_proto_messages(protobuf PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/examples/protobuf/main.proto +) +target_sources(protobuf PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/examples/protobuf/main.cpp +) +target_allocator(protobuf + system_allocator +) +target_proto_addincls(protobuf + ./ + ${CMAKE_SOURCE_DIR}/ + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src +) +target_proto_outs(protobuf + --cpp_out=${CMAKE_BINARY_DIR}/ + --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ +) +vcs_info(protobuf) diff --git a/ydb/library/yql/public/purecalc/examples/protobuf/CMakeLists.linux-aarch64.txt b/ydb/library/yql/public/purecalc/examples/protobuf/CMakeLists.linux-aarch64.txt new file mode 100644 index 0000000000..6bc0ca6ea0 --- /dev/null +++ b/ydb/library/yql/public/purecalc/examples/protobuf/CMakeLists.linux-aarch64.txt @@ -0,0 +1,67 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) + +add_executable(protobuf) +target_compile_options(protobuf PRIVATE + -DUSE_CURRENT_UDF_ABI_VERSION +) +target_link_libraries(protobuf PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + yql-public-purecalc + purecalc-io_specs-protobuf + purecalc-helpers-stream + contrib-libs-protobuf +) +target_link_options(protobuf PRIVATE + -ldl + -lrt + -Wl,--no-as-needed + -fPIC + -fPIC + -lpthread + -lrt + -ldl +) +target_proto_messages(protobuf PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/examples/protobuf/main.proto +) +target_sources(protobuf PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/examples/protobuf/main.cpp +) +target_allocator(protobuf + cpp-malloc-jemalloc +) +target_proto_addincls(protobuf + ./ + ${CMAKE_SOURCE_DIR}/ + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src +) +target_proto_outs(protobuf + --cpp_out=${CMAKE_BINARY_DIR}/ + --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ +) +vcs_info(protobuf) diff --git a/ydb/library/yql/public/purecalc/examples/protobuf/CMakeLists.linux-x86_64.txt b/ydb/library/yql/public/purecalc/examples/protobuf/CMakeLists.linux-x86_64.txt new file mode 100644 index 0000000000..9a176f6229 --- /dev/null +++ b/ydb/library/yql/public/purecalc/examples/protobuf/CMakeLists.linux-x86_64.txt @@ -0,0 +1,69 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) + +add_executable(protobuf) +target_compile_options(protobuf PRIVATE + -DUSE_CURRENT_UDF_ABI_VERSION +) +target_link_libraries(protobuf PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + library-cpp-cpuid_check + yql-public-purecalc + purecalc-io_specs-protobuf + purecalc-helpers-stream + contrib-libs-protobuf +) +target_link_options(protobuf PRIVATE + -ldl + -lrt + -Wl,--no-as-needed + -fPIC + -fPIC + -lpthread + -lrt + -ldl +) +target_proto_messages(protobuf PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/examples/protobuf/main.proto +) +target_sources(protobuf PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/examples/protobuf/main.cpp +) +target_allocator(protobuf + cpp-malloc-tcmalloc + libs-tcmalloc-no_percpu_cache +) +target_proto_addincls(protobuf + ./ + ${CMAKE_SOURCE_DIR}/ + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src +) +target_proto_outs(protobuf + --cpp_out=${CMAKE_BINARY_DIR}/ + --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ +) +vcs_info(protobuf) diff --git a/ydb/library/yql/public/purecalc/examples/protobuf/CMakeLists.txt b/ydb/library/yql/public/purecalc/examples/protobuf/CMakeLists.txt new file mode 100644 index 0000000000..f8b31df0c1 --- /dev/null +++ b/ydb/library/yql/public/purecalc/examples/protobuf/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) + include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/ydb/library/yql/public/purecalc/examples/protobuf/CMakeLists.windows-x86_64.txt b/ydb/library/yql/public/purecalc/examples/protobuf/CMakeLists.windows-x86_64.txt new file mode 100644 index 0000000000..d6221c45d2 --- /dev/null +++ b/ydb/library/yql/public/purecalc/examples/protobuf/CMakeLists.windows-x86_64.txt @@ -0,0 +1,57 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) + +add_executable(protobuf) +target_compile_options(protobuf PRIVATE + -DUSE_CURRENT_UDF_ABI_VERSION +) +target_link_libraries(protobuf PUBLIC + contrib-libs-cxxsupp + yutil + library-cpp-cpuid_check + yql-public-purecalc + purecalc-io_specs-protobuf + purecalc-helpers-stream + contrib-libs-protobuf +) +target_proto_messages(protobuf PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/examples/protobuf/main.proto +) +target_sources(protobuf PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/examples/protobuf/main.cpp +) +target_allocator(protobuf + system_allocator +) +target_proto_addincls(protobuf + ./ + ${CMAKE_SOURCE_DIR}/ + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src +) +target_proto_outs(protobuf + --cpp_out=${CMAKE_BINARY_DIR}/ + --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ +) +vcs_info(protobuf) diff --git a/ydb/library/yql/public/purecalc/examples/protobuf/main.cpp b/ydb/library/yql/public/purecalc/examples/protobuf/main.cpp new file mode 100644 index 0000000000..8ce3692766 --- /dev/null +++ b/ydb/library/yql/public/purecalc/examples/protobuf/main.cpp @@ -0,0 +1,133 @@ +#include <ydb/library/yql/public/purecalc/examples/protobuf/main.pb.h> + +#include <ydb/library/yql/public/purecalc/purecalc.h> +#include <ydb/library/yql/public/purecalc/io_specs/protobuf/spec.h> +#include <ydb/library/yql/public/purecalc/helpers/stream/stream_from_vector.h> + +using namespace NYql::NPureCalc; +using namespace NExampleProtos; + +void PullStreamExample(IProgramFactoryPtr); +void PushStreamExample(IProgramFactoryPtr); +void PrecompileExample(IProgramFactoryPtr factory); +THolder<IStream<TInput*>> MakeInput(); + +class TConsumer: public IConsumer<TOutput*> { +public: + void OnObject(TOutput* message) override { + Cout << "path = " << message->GetPath() << Endl; + Cout << "host = " << message->GetHost() << Endl; + } + + void OnFinish() override { + Cout << "end" << Endl; + } +}; + +const char* Query = R"( + $a = (SELECT * FROM Input); + $b = (SELECT CAST(Url::GetTail(Url) AS Utf8) AS Path, CAST(Url::GetHost(Url) AS Utf8) AS Host, Ip FROM $a); + $c = (SELECT Path, Host FROM $b WHERE Path IS NOT NULL AND Host IS NOT NULL AND Ip::IsIPv4(Ip::FromString(Ip))); + $d = (SELECT Unwrap(Path) AS Path, Unwrap(Host) AS Host FROM $c); + SELECT * FROM $d; +)"; + +int main(int argc, char** argv) { + try { + auto factory = MakeProgramFactory( + TProgramFactoryOptions().SetUDFsDir(argc > 1 ? argv[1] : "../../../../udfs")); + + Cout << "Pull stream:" << Endl; + PullStreamExample(factory); + + Cout << Endl; + Cout << "Push stream:" << Endl; + PushStreamExample(factory); + + Cout << Endl; + Cout << "Pull stream with pre-compilation:" << Endl; + PrecompileExample(factory); + } catch (const TCompileError& err) { + Cerr << err.GetIssues() << Endl; + Cerr << err.what() << Endl; + } +} + +void PullStreamExample(IProgramFactoryPtr factory) { + auto program = factory->MakePullStreamProgram( + TProtobufInputSpec<TInput>(), + TProtobufOutputSpec<TOutput>(), + Query, + ETranslationMode::SQL); + + auto result = program->Apply(MakeInput()); + + while (auto* message = result->Fetch()) { + Cout << "path = " << message->GetPath() << Endl; + Cout << "host = " << message->GetHost() << Endl; + } +} + +void PushStreamExample(IProgramFactoryPtr factory) { + auto program = factory->MakePushStreamProgram( + TProtobufInputSpec<TInput>(), + TProtobufOutputSpec<TOutput>(), + Query, + ETranslationMode::SQL); + + auto consumer = program->Apply(MakeHolder<TConsumer>()); + + auto input = MakeInput(); + while (auto* message = input->Fetch()) { + consumer->OnObject(message); + } + consumer->OnFinish(); +} + +void PrecompileExample(IProgramFactoryPtr factory) { + TString prg; + { + auto program = factory->MakePullStreamProgram( + TProtobufInputSpec<TInput>(), + TProtobufOutputSpec<TOutput>(), + Query, + ETranslationMode::SQL); + + prg = program->GetCompiledProgram(); + } + + auto program = factory->MakePullStreamProgram( + TProtobufInputSpec<TInput>(), + TProtobufOutputSpec<TOutput>(), + prg, + ETranslationMode::Mkql); + + auto result = program->Apply(MakeInput()); + + while (auto* message = result->Fetch()) { + Cout << "path = " << message->GetPath() << Endl; + Cout << "host = " << message->GetHost() << Endl; + } +} + +THolder<IStream<TInput*>> MakeInput() { + TVector<TInput> input; + + { + auto& message = input.emplace_back(); + message.SetUrl("https://news.yandex.ru/Moscow/index.html?from=index"); + message.SetIp("83.220.231.160"); + } + { + auto& message = input.emplace_back(); + message.SetUrl("https://music.yandex.ru/radio/"); + message.SetIp("83.220.231.161"); + } + { + auto& message = input.emplace_back(); + message.SetUrl("https://yandex.ru/maps/?ll=141.475401%2C11.581666&spn=1.757813%2C1.733096&z=7&l=map%2Cstv%2Csta&mode=search&panorama%5Bpoint%5D=141.476317%2C11.582710&panorama%5Bdirection%5D=177.241445%2C-15.219821&panorama%5Bspan%5D=107.410156%2C61.993317"); + message.SetIp("::ffff:77.75.155.3"); + } + + return StreamFromVector(std::move(input)); +} diff --git a/ydb/library/yql/public/purecalc/examples/protobuf/main.proto b/ydb/library/yql/public/purecalc/examples/protobuf/main.proto new file mode 100644 index 0000000000..54fd15e226 --- /dev/null +++ b/ydb/library/yql/public/purecalc/examples/protobuf/main.proto @@ -0,0 +1,11 @@ +package NExampleProtos; + +message TInput { + required string Url = 1; + required string Ip = 2; +} + +message TOutput { + required string Path = 1; + required string Host = 2; +} diff --git a/ydb/library/yql/public/purecalc/examples/protobuf/ut/canondata/exectest.run_protobuf_/log.out b/ydb/library/yql/public/purecalc/examples/protobuf/ut/canondata/exectest.run_protobuf_/log.out new file mode 100644 index 0000000000..1ec34e485d --- /dev/null +++ b/ydb/library/yql/public/purecalc/examples/protobuf/ut/canondata/exectest.run_protobuf_/log.out @@ -0,0 +1,18 @@ +Pull stream: +path = /Moscow/index.html?from=index +host = news.yandex.ru +path = /radio/ +host = music.yandex.ru + +Push stream: +path = /Moscow/index.html?from=index +host = news.yandex.ru +path = /radio/ +host = music.yandex.ru +end + +Pull stream with pre-compilation: +path = /Moscow/index.html?from=index +host = news.yandex.ru +path = /radio/ +host = music.yandex.ru diff --git a/ydb/library/yql/public/purecalc/examples/protobuf/ut/canondata/result.json b/ydb/library/yql/public/purecalc/examples/protobuf/ut/canondata/result.json new file mode 100644 index 0000000000..96a5814765 --- /dev/null +++ b/ydb/library/yql/public/purecalc/examples/protobuf/ut/canondata/result.json @@ -0,0 +1,5 @@ +{ + "exectest.run[protobuf]": { + "uri": "file://exectest.run_protobuf_/log.out" + } +} diff --git a/ydb/library/yql/public/purecalc/examples/protobuf/ut/ya.make b/ydb/library/yql/public/purecalc/examples/protobuf/ut/ya.make new file mode 100644 index 0000000000..04c2feeba0 --- /dev/null +++ b/ydb/library/yql/public/purecalc/examples/protobuf/ut/ya.make @@ -0,0 +1,11 @@ +EXECTEST() + +RUN(protobuf ${ARCADIA_BUILD_ROOT}/yql/udfs STDOUT log.out CANONIZE_LOCALLY log.out) + +DEPENDS( + ydb/library/yql/public/purecalc/examples/protobuf + yql/udfs/common/url + yql/udfs/common/ip +) + +END() diff --git a/ydb/library/yql/public/purecalc/examples/protobuf/ya.make b/ydb/library/yql/public/purecalc/examples/protobuf/ya.make new file mode 100644 index 0000000000..a03f259bff --- /dev/null +++ b/ydb/library/yql/public/purecalc/examples/protobuf/ya.make @@ -0,0 +1,27 @@ +PROGRAM() + +SRCS( + main.proto + main.cpp +) + +PEERDIR( + ydb/library/yql/public/purecalc + ydb/library/yql/public/purecalc/io_specs/protobuf + ydb/library/yql/public/purecalc/helpers/stream +) + + + YQL_LAST_ABI_VERSION() + + +END() + +RECURSE_ROOT_RELATIVE( + yql/udfs/common/url + yql/udfs/common/ip +) + +RECURSE_FOR_TESTS( + ut +) diff --git a/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/CMakeLists.darwin-x86_64.txt b/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/CMakeLists.darwin-x86_64.txt new file mode 100644 index 0000000000..d33d5a41fa --- /dev/null +++ b/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,64 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) + +add_executable(protobuf_pull_list) +target_compile_options(protobuf_pull_list PRIVATE + -DUSE_CURRENT_UDF_ABI_VERSION +) +target_link_libraries(protobuf_pull_list PUBLIC + contrib-libs-cxxsupp + yutil + library-cpp-cpuid_check + yql-public-purecalc + purecalc-io_specs-protobuf + purecalc-helpers-stream + contrib-libs-protobuf +) +target_link_options(protobuf_pull_list PRIVATE + -Wl,-platform_version,macos,11.0,11.0 + -fPIC + -fPIC + -framework + CoreFoundation +) +target_proto_messages(protobuf_pull_list PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/main.proto +) +target_sources(protobuf_pull_list PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/main.cpp +) +target_allocator(protobuf_pull_list + system_allocator +) +target_proto_addincls(protobuf_pull_list + ./ + ${CMAKE_SOURCE_DIR}/ + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src +) +target_proto_outs(protobuf_pull_list + --cpp_out=${CMAKE_BINARY_DIR}/ + --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ +) +vcs_info(protobuf_pull_list) diff --git a/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/CMakeLists.linux-aarch64.txt b/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/CMakeLists.linux-aarch64.txt new file mode 100644 index 0000000000..efb081bc44 --- /dev/null +++ b/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/CMakeLists.linux-aarch64.txt @@ -0,0 +1,67 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) + +add_executable(protobuf_pull_list) +target_compile_options(protobuf_pull_list PRIVATE + -DUSE_CURRENT_UDF_ABI_VERSION +) +target_link_libraries(protobuf_pull_list PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + yql-public-purecalc + purecalc-io_specs-protobuf + purecalc-helpers-stream + contrib-libs-protobuf +) +target_link_options(protobuf_pull_list PRIVATE + -ldl + -lrt + -Wl,--no-as-needed + -fPIC + -fPIC + -lpthread + -lrt + -ldl +) +target_proto_messages(protobuf_pull_list PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/main.proto +) +target_sources(protobuf_pull_list PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/main.cpp +) +target_allocator(protobuf_pull_list + cpp-malloc-jemalloc +) +target_proto_addincls(protobuf_pull_list + ./ + ${CMAKE_SOURCE_DIR}/ + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src +) +target_proto_outs(protobuf_pull_list + --cpp_out=${CMAKE_BINARY_DIR}/ + --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ +) +vcs_info(protobuf_pull_list) diff --git a/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/CMakeLists.linux-x86_64.txt b/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/CMakeLists.linux-x86_64.txt new file mode 100644 index 0000000000..f27cca0d51 --- /dev/null +++ b/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/CMakeLists.linux-x86_64.txt @@ -0,0 +1,69 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) + +add_executable(protobuf_pull_list) +target_compile_options(protobuf_pull_list PRIVATE + -DUSE_CURRENT_UDF_ABI_VERSION +) +target_link_libraries(protobuf_pull_list PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + library-cpp-cpuid_check + yql-public-purecalc + purecalc-io_specs-protobuf + purecalc-helpers-stream + contrib-libs-protobuf +) +target_link_options(protobuf_pull_list PRIVATE + -ldl + -lrt + -Wl,--no-as-needed + -fPIC + -fPIC + -lpthread + -lrt + -ldl +) +target_proto_messages(protobuf_pull_list PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/main.proto +) +target_sources(protobuf_pull_list PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/main.cpp +) +target_allocator(protobuf_pull_list + cpp-malloc-tcmalloc + libs-tcmalloc-no_percpu_cache +) +target_proto_addincls(protobuf_pull_list + ./ + ${CMAKE_SOURCE_DIR}/ + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src +) +target_proto_outs(protobuf_pull_list + --cpp_out=${CMAKE_BINARY_DIR}/ + --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ +) +vcs_info(protobuf_pull_list) diff --git a/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/CMakeLists.txt b/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/CMakeLists.txt new file mode 100644 index 0000000000..f8b31df0c1 --- /dev/null +++ b/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) + include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/CMakeLists.windows-x86_64.txt b/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/CMakeLists.windows-x86_64.txt new file mode 100644 index 0000000000..e74c8255b6 --- /dev/null +++ b/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/CMakeLists.windows-x86_64.txt @@ -0,0 +1,57 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) + +add_executable(protobuf_pull_list) +target_compile_options(protobuf_pull_list PRIVATE + -DUSE_CURRENT_UDF_ABI_VERSION +) +target_link_libraries(protobuf_pull_list PUBLIC + contrib-libs-cxxsupp + yutil + library-cpp-cpuid_check + yql-public-purecalc + purecalc-io_specs-protobuf + purecalc-helpers-stream + contrib-libs-protobuf +) +target_proto_messages(protobuf_pull_list PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/main.proto +) +target_sources(protobuf_pull_list PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/main.cpp +) +target_allocator(protobuf_pull_list + system_allocator +) +target_proto_addincls(protobuf_pull_list + ./ + ${CMAKE_SOURCE_DIR}/ + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src +) +target_proto_outs(protobuf_pull_list + --cpp_out=${CMAKE_BINARY_DIR}/ + --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ +) +vcs_info(protobuf_pull_list) diff --git a/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/main.cpp b/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/main.cpp new file mode 100644 index 0000000000..f10c2aa9be --- /dev/null +++ b/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/main.cpp @@ -0,0 +1,75 @@ +#include <ydb/library/yql/public/purecalc/examples/protobuf_pull_list/main.pb.h> + +#include <ydb/library/yql/public/purecalc/purecalc.h> +#include <ydb/library/yql/public/purecalc/io_specs/protobuf/spec.h> +#include <ydb/library/yql/public/purecalc/helpers/stream/stream_from_vector.h> + +using namespace NYql::NPureCalc; +using namespace NExampleProtos; + +const char* Query = R"( + SELECT + Url, + COUNT(*) AS Hits + FROM + Input + GROUP BY + Url + ORDER BY + Url +)"; + +THolder<IStream<TInput*>> MakeInput(); + +int main() { + try { + auto factory = MakeProgramFactory(); + + auto program = factory->MakePullListProgram( + TProtobufInputSpec<TInput>(), + TProtobufOutputSpec<TOutput>(), + Query, + ETranslationMode::SQL + ); + + auto result = program->Apply(MakeInput()); + + while (auto* message = result->Fetch()) { + Cout << "url = " << message->GetUrl() << Endl; + Cout << "hits = " << message->GetHits() << Endl; + } + } catch (TCompileError& e) { + Cout << e.GetIssues(); + } +} + +THolder<IStream<TInput*>> MakeInput() { + TVector<TInput> input; + + { + auto& message = input.emplace_back(); + message.SetUrl("https://yandex.ru/a"); + } + { + auto& message = input.emplace_back(); + message.SetUrl("https://yandex.ru/a"); + } + { + auto& message = input.emplace_back(); + message.SetUrl("https://yandex.ru/b"); + } + { + auto& message = input.emplace_back(); + message.SetUrl("https://yandex.ru/c"); + } + { + auto& message = input.emplace_back(); + message.SetUrl("https://yandex.ru/b"); + } + { + auto& message = input.emplace_back(); + message.SetUrl("https://yandex.ru/b"); + } + + return StreamFromVector(std::move(input)); +} diff --git a/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/main.proto b/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/main.proto new file mode 100644 index 0000000000..2766c4b8c0 --- /dev/null +++ b/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/main.proto @@ -0,0 +1,10 @@ +package NExampleProtos; + +message TInput { + required string Url = 1; +} + +message TOutput { + required string Url = 1; + required uint64 Hits = 2; +} diff --git a/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/ut/canondata/exectest.run_protobuf_pull_list_/log.out b/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/ut/canondata/exectest.run_protobuf_pull_list_/log.out new file mode 100644 index 0000000000..0a799ed4b0 --- /dev/null +++ b/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/ut/canondata/exectest.run_protobuf_pull_list_/log.out @@ -0,0 +1,6 @@ +url = https://yandex.ru/a +hits = 2 +url = https://yandex.ru/b +hits = 3 +url = https://yandex.ru/c +hits = 1 diff --git a/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/ut/canondata/result.json b/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/ut/canondata/result.json new file mode 100644 index 0000000000..668467cc85 --- /dev/null +++ b/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/ut/canondata/result.json @@ -0,0 +1,6 @@ +{ + "exectest.run[protobuf_pull_list]": { + "checksum": "29bf513fe0ca6f81ae076213a1c7801c", + "uri": "file://exectest.run_protobuf_pull_list_/log.out" + } +}
\ No newline at end of file diff --git a/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/ut/ya.make b/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/ut/ya.make new file mode 100644 index 0000000000..011ee76699 --- /dev/null +++ b/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/ut/ya.make @@ -0,0 +1,9 @@ +EXECTEST() + +RUN(protobuf_pull_list STDOUT log.out CANONIZE_LOCALLY log.out) + +DEPENDS( + ydb/library/yql/public/purecalc/examples/protobuf_pull_list +) + +END() diff --git a/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/ya.make b/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/ya.make new file mode 100644 index 0000000000..953ff1bf92 --- /dev/null +++ b/ydb/library/yql/public/purecalc/examples/protobuf_pull_list/ya.make @@ -0,0 +1,22 @@ +PROGRAM() + +SRCS( + main.proto + main.cpp +) + +PEERDIR( + ydb/library/yql/public/purecalc + ydb/library/yql/public/purecalc/io_specs/protobuf + ydb/library/yql/public/purecalc/helpers/stream +) + + + YQL_LAST_ABI_VERSION() + + +END() + +RECURSE_FOR_TESTS( + ut +) diff --git a/ydb/library/yql/public/purecalc/examples/skiff_pull_list/CMakeLists.darwin-x86_64.txt b/ydb/library/yql/public/purecalc/examples/skiff_pull_list/CMakeLists.darwin-x86_64.txt new file mode 100644 index 0000000000..d2cf21e855 --- /dev/null +++ b/ydb/library/yql/public/purecalc/examples/skiff_pull_list/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,34 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_executable(skiff_pull_list) +target_compile_options(skiff_pull_list PRIVATE + -DUSE_CURRENT_UDF_ABI_VERSION +) +target_link_libraries(skiff_pull_list PUBLIC + contrib-libs-cxxsupp + yutil + library-cpp-cpuid_check + yql-public-purecalc + purecalc-io_specs-mkql +) +target_link_options(skiff_pull_list PRIVATE + -Wl,-platform_version,macos,11.0,11.0 + -fPIC + -fPIC + -framework + CoreFoundation +) +target_sources(skiff_pull_list PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/examples/skiff_pull_list/main.cpp +) +target_allocator(skiff_pull_list + system_allocator +) +vcs_info(skiff_pull_list) diff --git a/ydb/library/yql/public/purecalc/examples/skiff_pull_list/CMakeLists.linux-aarch64.txt b/ydb/library/yql/public/purecalc/examples/skiff_pull_list/CMakeLists.linux-aarch64.txt new file mode 100644 index 0000000000..596b226847 --- /dev/null +++ b/ydb/library/yql/public/purecalc/examples/skiff_pull_list/CMakeLists.linux-aarch64.txt @@ -0,0 +1,37 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_executable(skiff_pull_list) +target_compile_options(skiff_pull_list PRIVATE + -DUSE_CURRENT_UDF_ABI_VERSION +) +target_link_libraries(skiff_pull_list PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + yql-public-purecalc + purecalc-io_specs-mkql +) +target_link_options(skiff_pull_list PRIVATE + -ldl + -lrt + -Wl,--no-as-needed + -fPIC + -fPIC + -lpthread + -lrt + -ldl +) +target_sources(skiff_pull_list PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/examples/skiff_pull_list/main.cpp +) +target_allocator(skiff_pull_list + cpp-malloc-jemalloc +) +vcs_info(skiff_pull_list) diff --git a/ydb/library/yql/public/purecalc/examples/skiff_pull_list/CMakeLists.linux-x86_64.txt b/ydb/library/yql/public/purecalc/examples/skiff_pull_list/CMakeLists.linux-x86_64.txt new file mode 100644 index 0000000000..ebcdf00807 --- /dev/null +++ b/ydb/library/yql/public/purecalc/examples/skiff_pull_list/CMakeLists.linux-x86_64.txt @@ -0,0 +1,39 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_executable(skiff_pull_list) +target_compile_options(skiff_pull_list PRIVATE + -DUSE_CURRENT_UDF_ABI_VERSION +) +target_link_libraries(skiff_pull_list PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + library-cpp-cpuid_check + yql-public-purecalc + purecalc-io_specs-mkql +) +target_link_options(skiff_pull_list PRIVATE + -ldl + -lrt + -Wl,--no-as-needed + -fPIC + -fPIC + -lpthread + -lrt + -ldl +) +target_sources(skiff_pull_list PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/examples/skiff_pull_list/main.cpp +) +target_allocator(skiff_pull_list + cpp-malloc-tcmalloc + libs-tcmalloc-no_percpu_cache +) +vcs_info(skiff_pull_list) diff --git a/ydb/library/yql/public/purecalc/examples/skiff_pull_list/CMakeLists.txt b/ydb/library/yql/public/purecalc/examples/skiff_pull_list/CMakeLists.txt new file mode 100644 index 0000000000..f8b31df0c1 --- /dev/null +++ b/ydb/library/yql/public/purecalc/examples/skiff_pull_list/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) + include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/ydb/library/yql/public/purecalc/examples/skiff_pull_list/CMakeLists.windows-x86_64.txt b/ydb/library/yql/public/purecalc/examples/skiff_pull_list/CMakeLists.windows-x86_64.txt new file mode 100644 index 0000000000..84d2ebfdcc --- /dev/null +++ b/ydb/library/yql/public/purecalc/examples/skiff_pull_list/CMakeLists.windows-x86_64.txt @@ -0,0 +1,27 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_executable(skiff_pull_list) +target_compile_options(skiff_pull_list PRIVATE + -DUSE_CURRENT_UDF_ABI_VERSION +) +target_link_libraries(skiff_pull_list PUBLIC + contrib-libs-cxxsupp + yutil + library-cpp-cpuid_check + yql-public-purecalc + purecalc-io_specs-mkql +) +target_sources(skiff_pull_list PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/examples/skiff_pull_list/main.cpp +) +target_allocator(skiff_pull_list + system_allocator +) +vcs_info(skiff_pull_list) diff --git a/ydb/library/yql/public/purecalc/examples/skiff_pull_list/main.cpp b/ydb/library/yql/public/purecalc/examples/skiff_pull_list/main.cpp new file mode 100644 index 0000000000..57aa4e0f26 --- /dev/null +++ b/ydb/library/yql/public/purecalc/examples/skiff_pull_list/main.cpp @@ -0,0 +1,92 @@ + +#include <ydb/library/yql/public/purecalc/purecalc.h> +#include <ydb/library/yql/public/purecalc/io_specs/mkql/spec.h> + +#include <ydb/library/yql/core/user_data/yql_user_data.h> + +#include <util/stream/file.h> +#include <util/datetime/base.h> +#include <library/cpp/yson/node/node.h> +#include <library/cpp/yson/node/node_io.h> + +#include <library/cpp/skiff/skiff.h> + +using namespace NYql::NUserData; +using namespace NYT; +using namespace NYql::NPureCalc; + +const char* Query = R"( + SELECT + Url, + COUNT(*) AS Hits + FROM + Input + GROUP BY + Url + ORDER BY + Hits desc +)"; + +int main() { + auto addField = [&](NYT::TNode& members, const TString& name, const TString& type, const bool isOptional) { + auto typeNode = NYT::TNode::CreateList() + .Add("DataType") + .Add(type); + + if (isOptional) { + typeNode = NYT::TNode::CreateList() + .Add("OptionalType") + .Add(typeNode); + } + + members.Add(NYT::TNode::CreateList() + .Add(name) + .Add(typeNode)); + }; + + NYT::TNode members{NYT::TNode::CreateList()}; + addField(members, "Url", "String", false); + NYT::TNode schema = NYT::TNode::CreateList() + .Add("StructType") + .Add(members); + + Cout << "InputSchema: " << NodeToYsonString(schema) << Endl; + auto inputSpec = TSkiffInputSpec(TVector<NYT::TNode>{schema}); + auto outputSpec = TSkiffOutputSpec({NYT::TNode::CreateEntity()}); + auto factoryOptions = TProgramFactoryOptions(); + factoryOptions.SetNativeYtTypeFlags(0); + factoryOptions.SetLLVMSettings("OFF"); + auto factory = MakeProgramFactory(factoryOptions); + auto program = factory->MakePullListProgram( + inputSpec, + outputSpec, + Query, + ETranslationMode::SQL); + Cout << "OutpSchema: " << NYT::NodeToCanonicalYsonString(program->MakeFullOutputSchema()) << Endl; + TStringStream stream; + NSkiff::TUncheckedSkiffWriter writer{&stream}; + writer.WriteVariant16Tag(0); + writer.WriteString32("https://yandex.ru/a"); + writer.WriteVariant16Tag(0); + writer.WriteString32("https://yandex.ru/a"); + writer.WriteVariant16Tag(0); + writer.WriteString32("https://yandex.ru/b"); + writer.WriteVariant16Tag(0); + writer.WriteString32("https://yandex.ru/c"); + writer.WriteVariant16Tag(0); + writer.WriteString32("https://yandex.ru/b"); + writer.WriteVariant16Tag(0); + writer.WriteString32("https://yandex.ru/b"); + writer.Finish(); + auto input = TStringStream(stream); + auto handle = program->Apply(&input); + TStringStream output; + handle->Run(&output); + auto parser = NSkiff::TUncheckedSkiffParser(&output); + while (parser.HasMoreData()) { + parser.ParseVariant16Tag(); + auto hits = parser.ParseInt64(); + auto url = parser.ParseString32(); + Cout << "URL: " << url << " Hits: " << hits << Endl; + } +} diff --git a/ydb/library/yql/public/purecalc/examples/skiff_pull_list/ya.make b/ydb/library/yql/public/purecalc/examples/skiff_pull_list/ya.make new file mode 100644 index 0000000000..0966d670fe --- /dev/null +++ b/ydb/library/yql/public/purecalc/examples/skiff_pull_list/ya.make @@ -0,0 +1,14 @@ +PROGRAM() + +SRCS( + main.cpp +) + +PEERDIR( + ydb/library/yql/public/purecalc + ydb/library/yql/public/purecalc/io_specs/mkql +) + +YQL_LAST_ABI_VERSION() + +END() diff --git a/ydb/library/yql/public/purecalc/examples/ya.make b/ydb/library/yql/public/purecalc/examples/ya.make new file mode 100644 index 0000000000..ad5853e9c4 --- /dev/null +++ b/ydb/library/yql/public/purecalc/examples/ya.make @@ -0,0 +1,7 @@ +RECURSE( + protobuf + protobuf/ut + protobuf_pull_list + protobuf_pull_list/ut + skiff_pull_list +) diff --git a/ydb/library/yql/public/purecalc/helpers/CMakeLists.darwin-x86_64.txt b/ydb/library/yql/public/purecalc/helpers/CMakeLists.darwin-x86_64.txt new file mode 100644 index 0000000000..f83ed3b540 --- /dev/null +++ b/ydb/library/yql/public/purecalc/helpers/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,18 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(protobuf) +add_subdirectory(stream) + +add_library(public-purecalc-helpers INTERFACE) +target_link_libraries(public-purecalc-helpers INTERFACE + contrib-libs-cxxsupp + yutil + purecalc-helpers-protobuf + purecalc-helpers-stream +) diff --git a/ydb/library/yql/public/purecalc/helpers/CMakeLists.linux-aarch64.txt b/ydb/library/yql/public/purecalc/helpers/CMakeLists.linux-aarch64.txt new file mode 100644 index 0000000000..a28bda905c --- /dev/null +++ b/ydb/library/yql/public/purecalc/helpers/CMakeLists.linux-aarch64.txt @@ -0,0 +1,19 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(protobuf) +add_subdirectory(stream) + +add_library(public-purecalc-helpers INTERFACE) +target_link_libraries(public-purecalc-helpers INTERFACE + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + purecalc-helpers-protobuf + purecalc-helpers-stream +) diff --git a/ydb/library/yql/public/purecalc/helpers/CMakeLists.linux-x86_64.txt b/ydb/library/yql/public/purecalc/helpers/CMakeLists.linux-x86_64.txt new file mode 100644 index 0000000000..a28bda905c --- /dev/null +++ b/ydb/library/yql/public/purecalc/helpers/CMakeLists.linux-x86_64.txt @@ -0,0 +1,19 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(protobuf) +add_subdirectory(stream) + +add_library(public-purecalc-helpers INTERFACE) +target_link_libraries(public-purecalc-helpers INTERFACE + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + purecalc-helpers-protobuf + purecalc-helpers-stream +) diff --git a/ydb/library/yql/public/purecalc/helpers/CMakeLists.txt b/ydb/library/yql/public/purecalc/helpers/CMakeLists.txt new file mode 100644 index 0000000000..f8b31df0c1 --- /dev/null +++ b/ydb/library/yql/public/purecalc/helpers/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) + include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/ydb/library/yql/public/purecalc/helpers/CMakeLists.windows-x86_64.txt b/ydb/library/yql/public/purecalc/helpers/CMakeLists.windows-x86_64.txt new file mode 100644 index 0000000000..f83ed3b540 --- /dev/null +++ b/ydb/library/yql/public/purecalc/helpers/CMakeLists.windows-x86_64.txt @@ -0,0 +1,18 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(protobuf) +add_subdirectory(stream) + +add_library(public-purecalc-helpers INTERFACE) +target_link_libraries(public-purecalc-helpers INTERFACE + contrib-libs-cxxsupp + yutil + purecalc-helpers-protobuf + purecalc-helpers-stream +) diff --git a/ydb/library/yql/public/purecalc/helpers/protobuf/CMakeLists.darwin-x86_64.txt b/ydb/library/yql/public/purecalc/helpers/protobuf/CMakeLists.darwin-x86_64.txt new file mode 100644 index 0000000000..0f473a2304 --- /dev/null +++ b/ydb/library/yql/public/purecalc/helpers/protobuf/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,20 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(purecalc-helpers-protobuf) +target_link_libraries(purecalc-helpers-protobuf PUBLIC + contrib-libs-cxxsupp + yutil + contrib-libs-protobuf + cpp-yson-node + yt_proto-yt-formats +) +target_sources(purecalc-helpers-protobuf PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/helpers/protobuf/schema_from_proto.cpp +) diff --git a/ydb/library/yql/public/purecalc/helpers/protobuf/CMakeLists.linux-aarch64.txt b/ydb/library/yql/public/purecalc/helpers/protobuf/CMakeLists.linux-aarch64.txt new file mode 100644 index 0000000000..54e7e527ae --- /dev/null +++ b/ydb/library/yql/public/purecalc/helpers/protobuf/CMakeLists.linux-aarch64.txt @@ -0,0 +1,21 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(purecalc-helpers-protobuf) +target_link_libraries(purecalc-helpers-protobuf PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + contrib-libs-protobuf + cpp-yson-node + yt_proto-yt-formats +) +target_sources(purecalc-helpers-protobuf PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/helpers/protobuf/schema_from_proto.cpp +) diff --git a/ydb/library/yql/public/purecalc/helpers/protobuf/CMakeLists.linux-x86_64.txt b/ydb/library/yql/public/purecalc/helpers/protobuf/CMakeLists.linux-x86_64.txt new file mode 100644 index 0000000000..54e7e527ae --- /dev/null +++ b/ydb/library/yql/public/purecalc/helpers/protobuf/CMakeLists.linux-x86_64.txt @@ -0,0 +1,21 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(purecalc-helpers-protobuf) +target_link_libraries(purecalc-helpers-protobuf PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + contrib-libs-protobuf + cpp-yson-node + yt_proto-yt-formats +) +target_sources(purecalc-helpers-protobuf PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/helpers/protobuf/schema_from_proto.cpp +) diff --git a/ydb/library/yql/public/purecalc/helpers/protobuf/CMakeLists.txt b/ydb/library/yql/public/purecalc/helpers/protobuf/CMakeLists.txt new file mode 100644 index 0000000000..f8b31df0c1 --- /dev/null +++ b/ydb/library/yql/public/purecalc/helpers/protobuf/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) + include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/ydb/library/yql/public/purecalc/helpers/protobuf/CMakeLists.windows-x86_64.txt b/ydb/library/yql/public/purecalc/helpers/protobuf/CMakeLists.windows-x86_64.txt new file mode 100644 index 0000000000..0f473a2304 --- /dev/null +++ b/ydb/library/yql/public/purecalc/helpers/protobuf/CMakeLists.windows-x86_64.txt @@ -0,0 +1,20 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(purecalc-helpers-protobuf) +target_link_libraries(purecalc-helpers-protobuf PUBLIC + contrib-libs-cxxsupp + yutil + contrib-libs-protobuf + cpp-yson-node + yt_proto-yt-formats +) +target_sources(purecalc-helpers-protobuf PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/helpers/protobuf/schema_from_proto.cpp +) diff --git a/ydb/library/yql/public/purecalc/helpers/protobuf/schema_from_proto.cpp b/ydb/library/yql/public/purecalc/helpers/protobuf/schema_from_proto.cpp new file mode 100644 index 0000000000..6927c46240 --- /dev/null +++ b/ydb/library/yql/public/purecalc/helpers/protobuf/schema_from_proto.cpp @@ -0,0 +1,202 @@ +#include "schema_from_proto.h" + +#include <yt/yt_proto/yt/formats/extension.pb.h> + +#include <util/generic/algorithm.h> +#include <util/generic/string.h> +#include <util/string/printf.h> +#include <util/string/vector.h> + +namespace pb = google::protobuf; + +namespace NYql { + namespace NPureCalc { + + TProtoSchemaOptions::TProtoSchemaOptions() + : EnumPolicy(EEnumPolicy::Int32) + , ListIsOptional(false) + { + } + + TProtoSchemaOptions& TProtoSchemaOptions::SetEnumPolicy(EEnumPolicy policy) { + EnumPolicy = policy; + return *this; + } + + TProtoSchemaOptions& TProtoSchemaOptions::SetListIsOptional(bool value) { + ListIsOptional = value; + return *this; + } + + TProtoSchemaOptions& TProtoSchemaOptions::SetFieldRenames( + THashMap<TString, TString> fieldRenames + ) { + FieldRenames = std::move(fieldRenames); + return *this; + } + + namespace { + EEnumFormatType EnumFormatTypeWithYTFlag(const pb::FieldDescriptor& enumField, EEnumFormatType defaultEnumFormatType) { + auto flags = enumField.options().GetRepeatedExtension(NYT::flags); + for (auto flag : flags) { + if (flag == NYT::EWrapperFieldFlag::ENUM_INT) { + return EEnumFormatType::Int32; + } else if (flag == NYT::EWrapperFieldFlag::ENUM_STRING) { + return EEnumFormatType::String; + } + } + return defaultEnumFormatType; + } + } + + EEnumFormatType EnumFormatType(const pb::FieldDescriptor& enumField, EEnumPolicy enumPolicy) { + switch (enumPolicy) { + case EEnumPolicy::Int32: + return EEnumFormatType::Int32; + case EEnumPolicy::String: + return EEnumFormatType::String; + case EEnumPolicy::YTFlagDefaultInt32: + return EnumFormatTypeWithYTFlag(enumField, EEnumFormatType::Int32); + case EEnumPolicy::YTFlagDefaultString: + return EnumFormatTypeWithYTFlag(enumField, EEnumFormatType::String); + } + } + + namespace { + const char* FormatTypeName(const pb::FieldDescriptor* field, EEnumPolicy enumPolicy) { + switch (field->type()) { + case pb::FieldDescriptor::TYPE_DOUBLE: + return "Double"; + case pb::FieldDescriptor::TYPE_FLOAT: + return "Float"; + case pb::FieldDescriptor::TYPE_INT64: + case pb::FieldDescriptor::TYPE_SFIXED64: + case pb::FieldDescriptor::TYPE_SINT64: + return "Int64"; + case pb::FieldDescriptor::TYPE_UINT64: + case pb::FieldDescriptor::TYPE_FIXED64: + return "Uint64"; + case pb::FieldDescriptor::TYPE_INT32: + case pb::FieldDescriptor::TYPE_SFIXED32: + case pb::FieldDescriptor::TYPE_SINT32: + return "Int32"; + case pb::FieldDescriptor::TYPE_UINT32: + case pb::FieldDescriptor::TYPE_FIXED32: + return "Uint32"; + case pb::FieldDescriptor::TYPE_BOOL: + return "Bool"; + case pb::FieldDescriptor::TYPE_STRING: + return "Utf8"; + case pb::FieldDescriptor::TYPE_BYTES: + return "String"; + case pb::FieldDescriptor::TYPE_ENUM: + switch (EnumFormatType(*field, enumPolicy)) { + case EEnumFormatType::Int32: + return "Int32"; + case EEnumFormatType::String: + return "String"; + } + default: + ythrow yexception() << "Unsupported protobuf type: " << field->type_name() + << ", field: " << field->name() << ", " << int(field->type()); + } + } + } + + NYT::TNode MakeSchemaFromProto(const pb::Descriptor& descriptor, TVector<const pb::Descriptor*>& nested, const TProtoSchemaOptions& options) { + if (Find(nested, &descriptor) != nested.end()) { + TVector<TString> nestedNames; + for (const auto* d : nested) { + nestedNames.push_back(d->full_name()); + } + nestedNames.push_back(descriptor.full_name()); + ythrow yexception() << Sprintf("recursive messages are not supported (%s)", + JoinStrings(nestedNames, "->").c_str()); + } + nested.push_back(&descriptor); + + auto items = NYT::TNode::CreateList(); + for (int fieldNo = 0; fieldNo < descriptor.field_count(); ++fieldNo) { + const auto& fieldDescriptor = *descriptor.field(fieldNo); + + auto name = fieldDescriptor.name(); + if ( + auto renamePtr = options.FieldRenames.FindPtr(name); + nested.size() == 1 && renamePtr + ) { + name = *renamePtr; + } + + NYT::TNode itemType; + if (fieldDescriptor.type() == pb::FieldDescriptor::TYPE_MESSAGE) { + itemType = MakeSchemaFromProto(*fieldDescriptor.message_type(), nested, options); + } else { + itemType = NYT::TNode::CreateList(); + itemType.Add("DataType"); + itemType.Add(FormatTypeName(&fieldDescriptor, options.EnumPolicy)); + } + switch (fieldDescriptor.label()) { + case pb::FieldDescriptor::LABEL_OPTIONAL: + { + auto optionalType = NYT::TNode::CreateList(); + optionalType.Add("OptionalType"); + optionalType.Add(std::move(itemType)); + itemType = std::move(optionalType); + } + break; + case pb::FieldDescriptor::LABEL_REQUIRED: + break; + case pb::FieldDescriptor::LABEL_REPEATED: + { + auto listType = NYT::TNode::CreateList(); + listType.Add("ListType"); + listType.Add(std::move(itemType)); + itemType = std::move(listType); + if (options.ListIsOptional) { + itemType = NYT::TNode::CreateList().Add("OptionalType").Add(std::move(itemType)); + } + } + break; + default: + ythrow yexception() << "Unknown protobuf label: " << (ui32)fieldDescriptor.label() << ", field: " << name; + } + + auto itemNode = NYT::TNode::CreateList(); + itemNode.Add(name); + itemNode.Add(std::move(itemType)); + + items.Add(std::move(itemNode)); + } + auto root = NYT::TNode::CreateList(); + root.Add("StructType"); + root.Add(std::move(items)); + + nested.pop_back(); + return root; + } + + NYT::TNode MakeSchemaFromProto(const pb::Descriptor& descriptor, const TProtoSchemaOptions& options) { + TVector<const pb::Descriptor*> nested; + return MakeSchemaFromProto(descriptor, nested, options); + } + + NYT::TNode MakeVariantSchemaFromProtos(const TVector<const pb::Descriptor*>& descriptors, const TProtoSchemaOptions& options) { + Y_ENSURE(options.FieldRenames.empty(), "Renames are not supported in variant mode"); + + auto tupleItems = NYT::TNode::CreateList(); + for (auto descriptor : descriptors) { + tupleItems.Add(MakeSchemaFromProto(*descriptor, options)); + } + + auto tupleType = NYT::TNode::CreateList(); + tupleType.Add("TupleType"); + tupleType.Add(std::move(tupleItems)); + + auto variantType = NYT::TNode::CreateList(); + variantType.Add("VariantType"); + variantType.Add(std::move(tupleType)); + + return variantType; + } + } +} diff --git a/ydb/library/yql/public/purecalc/helpers/protobuf/schema_from_proto.h b/ydb/library/yql/public/purecalc/helpers/protobuf/schema_from_proto.h new file mode 100644 index 0000000000..168c654ac7 --- /dev/null +++ b/ydb/library/yql/public/purecalc/helpers/protobuf/schema_from_proto.h @@ -0,0 +1,60 @@ +#pragma once + +#include <library/cpp/yson/node/node.h> + +#include <util/generic/hash.h> +#include <util/generic/string.h> + +#include <google/protobuf/descriptor.h> + + +namespace NYql { + namespace NPureCalc { + enum class EEnumPolicy { + Int32, + String, + YTFlagDefaultInt32, + YTFlagDefaultString + }; + + enum class EEnumFormatType { + Int32, + String + }; + + /** + * Options that customize building of struct type from protobuf descriptor. + */ + struct TProtoSchemaOptions { + public: + EEnumPolicy EnumPolicy; + bool ListIsOptional; + THashMap<TString, TString> FieldRenames; + + public: + TProtoSchemaOptions(); + + public: + TProtoSchemaOptions& SetEnumPolicy(EEnumPolicy); + + TProtoSchemaOptions& SetListIsOptional(bool); + + TProtoSchemaOptions& SetFieldRenames( + THashMap<TString, TString> fieldRenames + ); + }; + + EEnumFormatType EnumFormatType(const google::protobuf::FieldDescriptor& enumField, EEnumPolicy enumPolicy); + + /** + * Build struct type from a protobuf descriptor. The returned yson can be loaded into a struct annotation node + * using the ParseTypeFromYson function. + */ + NYT::TNode MakeSchemaFromProto(const google::protobuf::Descriptor&, const TProtoSchemaOptions& = {}); + + /** + * Build variant over tuple type from protobuf descriptors. + */ + NYT::TNode MakeVariantSchemaFromProtos(const TVector<const google::protobuf::Descriptor*>&, const TProtoSchemaOptions& = {}); + } +} diff --git a/ydb/library/yql/public/purecalc/helpers/protobuf/ya.make b/ydb/library/yql/public/purecalc/helpers/protobuf/ya.make new file mode 100644 index 0000000000..11300baba8 --- /dev/null +++ b/ydb/library/yql/public/purecalc/helpers/protobuf/ya.make @@ -0,0 +1,14 @@ +LIBRARY() + +SRCS( + schema_from_proto.cpp +) + +PEERDIR( + contrib/libs/protobuf + library/cpp/yson/node + yt/yt_proto/yt/formats + yt/yt_proto/yt/formats +) + +END() diff --git a/ydb/library/yql/public/purecalc/helpers/stream/CMakeLists.darwin-x86_64.txt b/ydb/library/yql/public/purecalc/helpers/stream/CMakeLists.darwin-x86_64.txt new file mode 100644 index 0000000000..778b96d4ba --- /dev/null +++ b/ydb/library/yql/public/purecalc/helpers/stream/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,21 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(purecalc-helpers-stream) +target_compile_options(purecalc-helpers-stream PRIVATE + -DUSE_CURRENT_UDF_ABI_VERSION +) +target_link_libraries(purecalc-helpers-stream PUBLIC + contrib-libs-cxxsupp + yutil + public-purecalc-common +) +target_sources(purecalc-helpers-stream PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/helpers/stream/stream_from_vector.cpp +) diff --git a/ydb/library/yql/public/purecalc/helpers/stream/CMakeLists.linux-aarch64.txt b/ydb/library/yql/public/purecalc/helpers/stream/CMakeLists.linux-aarch64.txt new file mode 100644 index 0000000000..8f1f9643b6 --- /dev/null +++ b/ydb/library/yql/public/purecalc/helpers/stream/CMakeLists.linux-aarch64.txt @@ -0,0 +1,22 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(purecalc-helpers-stream) +target_compile_options(purecalc-helpers-stream PRIVATE + -DUSE_CURRENT_UDF_ABI_VERSION +) +target_link_libraries(purecalc-helpers-stream PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + public-purecalc-common +) +target_sources(purecalc-helpers-stream PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/helpers/stream/stream_from_vector.cpp +) diff --git a/ydb/library/yql/public/purecalc/helpers/stream/CMakeLists.linux-x86_64.txt b/ydb/library/yql/public/purecalc/helpers/stream/CMakeLists.linux-x86_64.txt new file mode 100644 index 0000000000..8f1f9643b6 --- /dev/null +++ b/ydb/library/yql/public/purecalc/helpers/stream/CMakeLists.linux-x86_64.txt @@ -0,0 +1,22 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(purecalc-helpers-stream) +target_compile_options(purecalc-helpers-stream PRIVATE + -DUSE_CURRENT_UDF_ABI_VERSION +) +target_link_libraries(purecalc-helpers-stream PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + public-purecalc-common +) +target_sources(purecalc-helpers-stream PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/helpers/stream/stream_from_vector.cpp +) diff --git a/ydb/library/yql/public/purecalc/helpers/stream/CMakeLists.txt b/ydb/library/yql/public/purecalc/helpers/stream/CMakeLists.txt new file mode 100644 index 0000000000..f8b31df0c1 --- /dev/null +++ b/ydb/library/yql/public/purecalc/helpers/stream/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) + include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/ydb/library/yql/public/purecalc/helpers/stream/CMakeLists.windows-x86_64.txt b/ydb/library/yql/public/purecalc/helpers/stream/CMakeLists.windows-x86_64.txt new file mode 100644 index 0000000000..778b96d4ba --- /dev/null +++ b/ydb/library/yql/public/purecalc/helpers/stream/CMakeLists.windows-x86_64.txt @@ -0,0 +1,21 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(purecalc-helpers-stream) +target_compile_options(purecalc-helpers-stream PRIVATE + -DUSE_CURRENT_UDF_ABI_VERSION +) +target_link_libraries(purecalc-helpers-stream PUBLIC + contrib-libs-cxxsupp + yutil + public-purecalc-common +) +target_sources(purecalc-helpers-stream PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/helpers/stream/stream_from_vector.cpp +) diff --git a/ydb/library/yql/public/purecalc/helpers/stream/stream_from_vector.cpp b/ydb/library/yql/public/purecalc/helpers/stream/stream_from_vector.cpp new file mode 100644 index 0000000000..e1aed5d689 --- /dev/null +++ b/ydb/library/yql/public/purecalc/helpers/stream/stream_from_vector.cpp @@ -0,0 +1 @@ +#include "stream_from_vector.h" diff --git a/ydb/library/yql/public/purecalc/helpers/stream/stream_from_vector.h b/ydb/library/yql/public/purecalc/helpers/stream/stream_from_vector.h new file mode 100644 index 0000000000..51d8513332 --- /dev/null +++ b/ydb/library/yql/public/purecalc/helpers/stream/stream_from_vector.h @@ -0,0 +1,40 @@ +#pragma once + +#include <ydb/library/yql/public/purecalc/common/interface.h> + +namespace NYql { + namespace NPureCalc { + namespace NPrivate { + template <typename T> + class TVectorStream final: public IStream<T*> { + private: + size_t I_; + TVector<T> Data_; + + public: + explicit TVectorStream(TVector<T> data) + : I_(0) + , Data_(std::move(data)) + { + } + + public: + T* Fetch() override { + if (I_ >= Data_.size()) { + return nullptr; + } else { + return &Data_[I_++]; + } + } + }; + } + + /** + * Convert vector into a purecalc stream. + */ + template <typename T> + THolder<IStream<T*>> StreamFromVector(TVector<T> data) { + return MakeHolder<NPrivate::TVectorStream<T>>(std::move(data)); + } + } +} diff --git a/ydb/library/yql/public/purecalc/helpers/stream/ya.make b/ydb/library/yql/public/purecalc/helpers/stream/ya.make new file mode 100644 index 0000000000..c96f93b582 --- /dev/null +++ b/ydb/library/yql/public/purecalc/helpers/stream/ya.make @@ -0,0 +1,13 @@ +LIBRARY() + +SRCS( + stream_from_vector.cpp +) + +PEERDIR( + ydb/library/yql/public/purecalc/common +) + +YQL_LAST_ABI_VERSION() + +END() diff --git a/ydb/library/yql/public/purecalc/helpers/ya.make b/ydb/library/yql/public/purecalc/helpers/ya.make new file mode 100644 index 0000000000..b228b159d9 --- /dev/null +++ b/ydb/library/yql/public/purecalc/helpers/ya.make @@ -0,0 +1,8 @@ +LIBRARY() + +PEERDIR( + ydb/library/yql/public/purecalc/helpers/protobuf + ydb/library/yql/public/purecalc/helpers/stream +) + +END() diff --git a/ydb/library/yql/public/purecalc/io_specs/CMakeLists.txt b/ydb/library/yql/public/purecalc/io_specs/CMakeLists.txt new file mode 100644 index 0000000000..6ce928c0a6 --- /dev/null +++ b/ydb/library/yql/public/purecalc/io_specs/CMakeLists.txt @@ -0,0 +1,11 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(mkql) +add_subdirectory(protobuf) +add_subdirectory(protobuf_raw) diff --git a/ydb/library/yql/public/purecalc/io_specs/mkql/CMakeLists.darwin-x86_64.txt b/ydb/library/yql/public/purecalc/io_specs/mkql/CMakeLists.darwin-x86_64.txt new file mode 100644 index 0000000000..a94716ee3e --- /dev/null +++ b/ydb/library/yql/public/purecalc/io_specs/mkql/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,27 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(ut) + +add_library(purecalc-io_specs-mkql) +target_compile_options(purecalc-io_specs-mkql PRIVATE + -DUSE_CURRENT_UDF_ABI_VERSION +) +target_link_libraries(purecalc-io_specs-mkql PUBLIC + contrib-libs-cxxsupp + yutil + public-purecalc-common + providers-yt-codec + providers-yt-common + yt-lib-mkql_helpers + providers-common-codec + common-schema-mkql +) +target_sources(purecalc-io_specs-mkql PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/io_specs/mkql/spec.cpp +) diff --git a/ydb/library/yql/public/purecalc/io_specs/mkql/CMakeLists.linux-aarch64.txt b/ydb/library/yql/public/purecalc/io_specs/mkql/CMakeLists.linux-aarch64.txt new file mode 100644 index 0000000000..2c6e3a6bee --- /dev/null +++ b/ydb/library/yql/public/purecalc/io_specs/mkql/CMakeLists.linux-aarch64.txt @@ -0,0 +1,28 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(ut) + +add_library(purecalc-io_specs-mkql) +target_compile_options(purecalc-io_specs-mkql PRIVATE + -DUSE_CURRENT_UDF_ABI_VERSION +) +target_link_libraries(purecalc-io_specs-mkql PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + public-purecalc-common + providers-yt-codec + providers-yt-common + yt-lib-mkql_helpers + providers-common-codec + common-schema-mkql +) +target_sources(purecalc-io_specs-mkql PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/io_specs/mkql/spec.cpp +) diff --git a/ydb/library/yql/public/purecalc/io_specs/mkql/CMakeLists.linux-x86_64.txt b/ydb/library/yql/public/purecalc/io_specs/mkql/CMakeLists.linux-x86_64.txt new file mode 100644 index 0000000000..2c6e3a6bee --- /dev/null +++ b/ydb/library/yql/public/purecalc/io_specs/mkql/CMakeLists.linux-x86_64.txt @@ -0,0 +1,28 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(ut) + +add_library(purecalc-io_specs-mkql) +target_compile_options(purecalc-io_specs-mkql PRIVATE + -DUSE_CURRENT_UDF_ABI_VERSION +) +target_link_libraries(purecalc-io_specs-mkql PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + public-purecalc-common + providers-yt-codec + providers-yt-common + yt-lib-mkql_helpers + providers-common-codec + common-schema-mkql +) +target_sources(purecalc-io_specs-mkql PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/io_specs/mkql/spec.cpp +) diff --git a/ydb/library/yql/public/purecalc/io_specs/mkql/CMakeLists.txt b/ydb/library/yql/public/purecalc/io_specs/mkql/CMakeLists.txt new file mode 100644 index 0000000000..f8b31df0c1 --- /dev/null +++ b/ydb/library/yql/public/purecalc/io_specs/mkql/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) + include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/ydb/library/yql/public/purecalc/io_specs/mkql/CMakeLists.windows-x86_64.txt b/ydb/library/yql/public/purecalc/io_specs/mkql/CMakeLists.windows-x86_64.txt new file mode 100644 index 0000000000..a94716ee3e --- /dev/null +++ b/ydb/library/yql/public/purecalc/io_specs/mkql/CMakeLists.windows-x86_64.txt @@ -0,0 +1,27 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(ut) + +add_library(purecalc-io_specs-mkql) +target_compile_options(purecalc-io_specs-mkql PRIVATE + -DUSE_CURRENT_UDF_ABI_VERSION +) +target_link_libraries(purecalc-io_specs-mkql PUBLIC + contrib-libs-cxxsupp + yutil + public-purecalc-common + providers-yt-codec + providers-yt-common + yt-lib-mkql_helpers + providers-common-codec + common-schema-mkql +) +target_sources(purecalc-io_specs-mkql PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/io_specs/mkql/spec.cpp +) diff --git a/ydb/library/yql/public/purecalc/io_specs/mkql/spec.cpp b/ydb/library/yql/public/purecalc/io_specs/mkql/spec.cpp new file mode 100644 index 0000000000..043b2ab156 --- /dev/null +++ b/ydb/library/yql/public/purecalc/io_specs/mkql/spec.cpp @@ -0,0 +1,934 @@ +#include "spec.h" + +#include <ydb/library/yql/public/purecalc/common/names.h> +#include <ydb/library/yql/minikql/computation/mkql_custom_list.h> +#include <ydb/library/yql/providers/yt/codec/yt_codec_io.h> +#include <ydb/library/yql/providers/yt/lib/mkql_helpers/mkql_helpers.h> +#include <ydb/library/yql/providers/yt/common/yql_names.h> +#include <ydb/library/yql/providers/common/codec/yql_codec_type_flags.h> +#include <ydb/library/yql/providers/common/schema/mkql/yql_mkql_schema.h> +#include <ydb/library/yql/minikql/mkql_node_cast.h> + +#include <library/cpp/yson/node/node_io.h> + +#include <util/generic/noncopyable.h> +#include <util/generic/ptr.h> + + +namespace { + const TStringBuf PathColumnShortName = "path"; + + template <typename T> + inline TVector<THolder<T>> VectorFromHolder(THolder<T> holder) { + TVector<THolder<T>> result; + result.push_back(std::move(holder)); + return result; + } + + template <typename TRowType> + NYT::TNode ComposeRowSpec(const TRowType* rowType, ui64 nativeYtTypeFlags, bool strictSchema) { + constexpr bool isNodeType = std::is_same_v<TRowType, NYT::TNode>; + + static_assert(isNodeType || std::is_same_v<TRowType, NKikimr::NMiniKQL::TType>); + + auto typeNode = NYT::TNode::CreateMap(); + if constexpr (isNodeType) { + typeNode[NYql::RowSpecAttrType] = *rowType; + } else { + typeNode[NYql::RowSpecAttrType] = NYql::NCommon::TypeToYsonNode(rowType); + } + typeNode[NYql::RowSpecAttrNativeYtTypeFlags] = nativeYtTypeFlags; + typeNode[NYql::RowSpecAttrStrictSchema] = strictSchema; + + auto attrNode = NYT::TNode::CreateMap(); + attrNode[NYql::YqlRowSpecAttribute] = std::move(typeNode); + + return attrNode; + } + + struct TInputDescription { + public: + ui32 InputIndex; + const TMaybe<TVector<TString>>& TableNames; + const NYT::TNode& InputSchema; + const bool UseOriginalRowSpec; + + public: + template <bool UseSkiff> + TInputDescription(const NYql::NPureCalc::TMkqlInputSpec<UseSkiff>& spec, ui32 inputIndex) + : InputIndex(inputIndex) + , TableNames(spec.GetTableNames(InputIndex)) + , InputSchema(spec.GetSchemas().at(inputIndex)) + , UseOriginalRowSpec(spec.UseOriginalRowSpec()) + { + } + + bool UseSystemColumns() const { + return TableNames.Defined(); + } + + size_t GetTablesNumber() const { + if (TableNames.Defined()) { + return TableNames->size(); + } + + return 1; + } + }; + + NYT::TNode ComposeYqlAttributesFromSchema( + const NKikimr::NMiniKQL::TType* type, + ui64 nativeYtTypeFlags, + bool strictSchema, + const TInputDescription* inputDescription = nullptr) + { + auto attrs = NYT::TNode::CreateMap(); + NYT::TNode& tables = attrs[NYql::YqlIOSpecTables]; + + switch (type->GetKind()) { + case NKikimr::NMiniKQL::TType::EKind::Variant: + { + YQL_ENSURE(!inputDescription); + + const auto* vtype = AS_TYPE(NKikimr::NMiniKQL::TVariantType, type); + + NYT::TNode& registryNode = attrs[NYql::YqlIOSpecRegistry]; + THashMap<TString, TString> uniqSpecs; + + for (ui32 i = 0; i < vtype->GetAlternativesCount(); i++) { + TString refName = TStringBuilder() << "$table" << uniqSpecs.size(); + + auto rowSpec = ComposeRowSpec(vtype->GetAlternativeType(i), nativeYtTypeFlags, strictSchema); + + auto res = uniqSpecs.emplace(NYT::NodeToCanonicalYsonString(rowSpec), refName); + if (res.second) { + registryNode[refName] = rowSpec; + } else { + refName = res.first->second; + } + tables.Add(refName); + } + break; + } + case NKikimr::NMiniKQL::TType::EKind::Struct: + { + auto rowSpec = NYT::TNode(); + + if (inputDescription && inputDescription->UseOriginalRowSpec) { + rowSpec = ComposeRowSpec(&inputDescription->InputSchema, nativeYtTypeFlags, strictSchema); + } else { + rowSpec = ComposeRowSpec(type, nativeYtTypeFlags, strictSchema); + } + + if (inputDescription && inputDescription->UseSystemColumns()) { + rowSpec[NYql::YqlSysColumnPrefix] = NYT::TNode().Add(PathColumnShortName); + } + + if (inputDescription && inputDescription->GetTablesNumber() > 1) { + TStringBuf refName = "$table0"; + attrs[NYql::YqlIOSpecRegistry][refName] = std::move(rowSpec); + for (ui32 i = 0; i < inputDescription->GetTablesNumber(); ++i) { + tables.Add(refName); + } + } else { + tables.Add(std::move(rowSpec)); + } + break; + } + default: + Y_UNREACHABLE(); + } + + return attrs; + } + + NYql::NCommon::TCodecContext MakeCodecCtx(NYql::NPureCalc::IWorker* worker) { + return NYql::NCommon::TCodecContext( + worker->GetTypeEnvironment(), + worker->GetFunctionRegistry(), + &worker->GetGraph().GetHolderFactory() + ); + } + + NYql::TMkqlIOSpecs GetIOSpecs( + NYql::NPureCalc::IWorker* worker, + NYql::NCommon::TCodecContext& codecCtx, + bool useSkiff, + const TInputDescription* inputDescription = nullptr, + bool strictSchema = true + ) { + NYql::TMkqlIOSpecs specs; + if (useSkiff) { + specs.SetUseSkiff(worker->GetLLVMSettings()); + } + + if (inputDescription) { + const auto* type = worker->GetInputType(inputDescription->InputIndex, true); + const auto* fullType = worker->GetInputType(inputDescription->InputIndex, false); + + YQL_ENSURE(!type->FindMemberIndex(NYql::YqlSysColumnPath)); + + size_t extraColumnsCount = 0; + if (inputDescription->UseSystemColumns()) { + YQL_ENSURE(fullType->FindMemberIndex(NYql::YqlSysColumnPath)); + ++extraColumnsCount; + } + if (!strictSchema) { + YQL_ENSURE(fullType->FindMemberIndex(NYql::YqlOthersColumnName)); + ++extraColumnsCount; + } + + if (extraColumnsCount != 0) { + YQL_ENSURE(fullType->GetMembersCount() == type->GetMembersCount() + extraColumnsCount); + } else { + YQL_ENSURE(type == fullType); + } + + auto attrs = ComposeYqlAttributesFromSchema(type, worker->GetNativeYtTypeFlags(), strictSchema, inputDescription); + if (inputDescription->TableNames) { + specs.Init(codecCtx, attrs, inputDescription->TableNames.GetRef(), {}); + } else { + specs.Init(codecCtx, attrs, {}, {}); + } + } else { + auto attrs = ComposeYqlAttributesFromSchema(worker->GetOutputType(), worker->GetNativeYtTypeFlags(), strictSchema); + specs.Init(codecCtx, attrs); + } + + return specs; + } + + class TRawTableReaderImpl final: public NYT::TRawTableReader { + private: + // If we own Underlying_, than Owned_ == Underlying_, otherwise Owned_ is nullptr. + THolder<IInputStream> Owned_; + IInputStream* Underlying_; + NKikimr::NMiniKQL::TScopedAlloc& ScopedAlloc_; + + private: + TRawTableReaderImpl( + IInputStream* underlying, + THolder<IInputStream> owned, + NKikimr::NMiniKQL::TScopedAlloc& scopedAlloc + ) + : Owned_(std::move(owned)) + , Underlying_(underlying) + , ScopedAlloc_(scopedAlloc) + { + } + + public: + TRawTableReaderImpl(THolder<IInputStream> stream, NKikimr::NMiniKQL::TScopedAlloc& scopedAlloc) + : TRawTableReaderImpl(stream.Get(), nullptr, scopedAlloc) + { + Owned_ = std::move(stream); + } + + TRawTableReaderImpl(IInputStream* stream, NKikimr::NMiniKQL::TScopedAlloc& scopedAlloc) + : TRawTableReaderImpl(stream, nullptr, scopedAlloc) + { + } + + bool Retry(const TMaybe<ui32>&, const TMaybe<ui64>&) override { + return false; + } + + void ResetRetries() override { + } + + bool HasRangeIndices() const override { + return false; + } + + protected: + size_t DoRead(void* buf, size_t len) override { + auto unguard = Unguard(ScopedAlloc_); + return Underlying_->Read(buf, len); + } + }; + + + class TMkqlListValue: public NKikimr::NMiniKQL::TCustomListValue { + private: + mutable bool HasIterator_ = false; + NYql::NPureCalc::IWorker* Worker_; + // Keeps struct members reorders + NYql::NCommon::TCodecContext CodecCtx_; + NYql::TMkqlIOSpecs IOSpecs_; + // If we own Underlying_, than Owned_ == Underlying_, otherwise Owned_ is nullptr. + THolder<NYT::TRawTableReader> Owned_; + NYT::TRawTableReader* Underlying_; + NYql::TMkqlReaderImpl Reader_; + + private: + TMkqlListValue( + NKikimr::NMiniKQL::TMemoryUsageInfo* memInfo, + bool useSkiff, + NYT::TRawTableReader* underlying, + THolder<NYT::TRawTableReader> owned, + NYql::NPureCalc::IWorker* worker, + const TInputDescription& inputDescription, + bool ignoreStreamTableIndex = false, + bool strictSchema = true + ) : TCustomListValue(memInfo) + , Worker_(worker) + , CodecCtx_(MakeCodecCtx(Worker_)) + , IOSpecs_(GetIOSpecs(Worker_, CodecCtx_, useSkiff, &inputDescription, strictSchema)) + , Owned_(std::move(owned)) + , Underlying_(underlying) + , Reader_(*Underlying_, 0, 1ul << 20, 0, ignoreStreamTableIndex) + { + Reader_.SetSpecs(IOSpecs_, Worker_->GetGraph().GetHolderFactory()); + Reader_.Next(); + } + + public: + TMkqlListValue( + NKikimr::NMiniKQL::TMemoryUsageInfo* memInfo, + bool useSkiff, + THolder<NYT::TRawTableReader> stream, + NYql::NPureCalc::IWorker* worker, + const TInputDescription& inputDescription, + bool ignoreStreamTableIndex = false, + bool strictSchema = true + ) + : TMkqlListValue( + memInfo, useSkiff, stream.Get(), nullptr, worker, inputDescription, ignoreStreamTableIndex, strictSchema) + { + Owned_ = std::move(stream); + } + + TMkqlListValue( + NKikimr::NMiniKQL::TMemoryUsageInfo* memInfo, + bool useSkiff, + NYT::TRawTableReader* stream, + NYql::NPureCalc::IWorker* worker, + const TInputDescription& inputDescription, + bool ignoreStreamTableIndex, + bool strictSchema = true + ) + : TMkqlListValue(memInfo, useSkiff, stream, nullptr, worker, inputDescription, ignoreStreamTableIndex, strictSchema) + { + } + + NKikimr::NUdf::TUnboxedValue GetListIterator() const override { + YQL_ENSURE(!HasIterator_, "Only one pass over input is supported"); + HasIterator_ = true; + return NKikimr::NUdf::TUnboxedValuePod(const_cast<TMkqlListValue*>(this)); + } + + bool Next(NKikimr::NUdf::TUnboxedValue& result) override { + if (!Reader_.IsValid()) { + return false; + } + + result = Reader_.GetRow(); + Reader_.Next(); + + return true; + } + + NKikimr::NUdf::EFetchStatus Fetch( + NKikimr::NUdf::TUnboxedValue& result + ) override { + if (Next(result)) { + return NKikimr::NUdf::EFetchStatus::Ok; + } + + return NKikimr::NUdf::EFetchStatus::Finish; + } + }; + + class TMkqlWriter: public NYql::NPureCalc::THandle { + protected: + virtual const NYql::NPureCalc::IWorker* GetWorker() const = 0; + virtual void DoRun(const TVector<IOutputStream*>& stream) = 0; + + public: + void Run(IOutputStream* stream) final { + Y_ENSURE( + GetWorker()->GetOutputType()->IsStruct(), + "NYql::NPureCalc::THandle::Run(IOutputStream*) cannot be used with multi-output programs; " + "use other overloads of Run() instead."); + + DoRun({stream}); + } + + void Run(const TVector<IOutputStream*>& streams) final { + Y_ENSURE( + GetWorker()->GetOutputType()->IsVariant(), + "NYql::NPureCalc::THandle::Run(TVector<IOutputStream*>) cannot be used with single-output programs; " + "use NYql::NPureCalc::THandle::Run(IOutputStream*) instead."); + + const auto* variantType = AS_TYPE(NKikimr::NMiniKQL::TVariantType, GetWorker()->GetOutputType()); + + Y_ENSURE( + variantType->GetUnderlyingType()->IsTuple(), + "NYql::NPureCalc::THandle::Run(TVector<IOutputStream*>) cannot be used to process variants over struct; " + "use NYql::NPureCalc::THandle::Run(TMap<TString, IOutputStream*>) instead."); + + const auto* tupleType = AS_TYPE(NKikimr::NMiniKQL::TTupleType, variantType->GetUnderlyingType()); + + Y_ENSURE( + tupleType->GetElementsCount() == streams.size(), + "Number of variant alternatives should match number of streams."); + + DoRun(streams); + } + + void Run(const TMap<TString, IOutputStream*>& streams) final { + Y_ENSURE( + GetWorker()->GetOutputType()->IsVariant(), + "NYql::NPureCalc::THandle::Run(TMap<TString, IOutputStream*>) cannot be used with single-output programs; " + "use NYql::NPureCalc::THandle::Run(IOutputStream*) instead."); + + const auto* variantType = AS_TYPE(NKikimr::NMiniKQL::TVariantType, GetWorker()->GetOutputType()); + + Y_ENSURE( + variantType->GetUnderlyingType()->IsStruct(), + "NYql::NPureCalc::THandle::Run(TMap<TString, IOutputStream*>) cannot be used to process variants over tuple; " + "use NYql::NPureCalc::THandle::Run(TVector<IOutputStream*>) instead."); + + const auto* structType = AS_TYPE(NKikimr::NMiniKQL::TStructType, variantType->GetUnderlyingType()); + + Y_ENSURE( + structType->GetMembersCount() == streams.size(), + "Number of variant alternatives should match number of streams."); + + TVector<IOutputStream*> sortedStreams; + sortedStreams.reserve(structType->GetMembersCount()); + + for (ui32 i = 0; i < structType->GetMembersCount(); i++) { + auto name = TString{structType->GetMemberName(i)}; + Y_ENSURE(streams.contains(name), "Cannot find stream for alternative " << name.Quote()); + sortedStreams.push_back(streams.at(name)); + } + + DoRun(sortedStreams); + } + }; + + class TPullListMkqlWriter: public TMkqlWriter { + private: + NYql::NPureCalc::TWorkerHolder<NYql::NPureCalc::IPullListWorker> Worker_; + NYql::NCommon::TCodecContext CodecCtx_; + NYql::TMkqlIOSpecs IOSpecs_; + + public: + TPullListMkqlWriter( + NYql::NPureCalc::TWorkerHolder<NYql::NPureCalc::IPullListWorker> worker, + bool useSkiff + ) + : Worker_(std::move(worker)) + , CodecCtx_(MakeCodecCtx(Worker_.Get())) + , IOSpecs_(GetIOSpecs(Worker_.Get(), CodecCtx_, useSkiff)) + { + } + + protected: + const NYql::NPureCalc::IWorker* GetWorker() const override { + return Worker_.Get(); + } + + void DoRun(const TVector<IOutputStream*>& outputs) override { + NKikimr::NMiniKQL::TBindTerminator bind(Worker_->GetGraph().GetTerminator()); + + with_lock(Worker_->GetScopedAlloc()) { + NYql::TMkqlWriterImpl writer{outputs, 0, 1ul << 20}; + writer.SetSpecs(IOSpecs_); + + const auto outputIterator = Worker_->GetOutputIterator(); + + for (NKikimr::NUdf::TUnboxedValue value; outputIterator.Next(value); writer.AddRow(value)) + continue; + + writer.Finish(); + } + } + }; + + class TPullStreamMkqlWriter: public TMkqlWriter { + private: + NYql::NPureCalc::TWorkerHolder<NYql::NPureCalc::IPullStreamWorker> Worker_; + NYql::NCommon::TCodecContext CodecCtx_; + NYql::TMkqlIOSpecs IOSpecs_; + + public: + TPullStreamMkqlWriter( + NYql::NPureCalc::TWorkerHolder<NYql::NPureCalc::IPullStreamWorker> worker, + bool useSkiff + ) + : Worker_(std::move(worker)) + , CodecCtx_(MakeCodecCtx(Worker_.Get())) + , IOSpecs_(GetIOSpecs(Worker_.Get(), CodecCtx_, useSkiff)) + { + } + + protected: + const NYql::NPureCalc::IWorker* GetWorker() const override { + return Worker_.Get(); + } + + void DoRun(const TVector<IOutputStream*>& outputs) override { + NKikimr::NMiniKQL::TBindTerminator bind(Worker_->GetGraph().GetTerminator()); + + with_lock(Worker_->GetScopedAlloc()) { + NYql::TMkqlWriterImpl writer{outputs, 0, 1ul << 20}; + writer.SetSpecs(IOSpecs_); + + const auto output = Worker_->GetOutput(); + + for (NKikimr::NUdf::TUnboxedValue value;;) { + const auto status = output.Fetch(value); + + if (status == NKikimr::NUdf::EFetchStatus::Ok) { + writer.AddRow(value); + } else if (status == NKikimr::NUdf::EFetchStatus::Finish) { + break; + } else { + YQL_ENSURE(false, "Yield is not supported in pull mode"); + } + } + + writer.Finish(); + } + } + }; +} + +namespace NYql { + namespace NPureCalc { + template <bool UseSkiff> + TMkqlInputSpec<UseSkiff>::TMkqlInputSpec(TVector<NYT::TNode> schemas) + : Schemas_(std::move(schemas)) + { + AllTableNames_ = TVector<TMaybe<TVector<TString>>>(Schemas_.size(), Nothing()); + this->AllVirtualColumns_ = TVector<THashMap<TString, NYT::TNode>>(Schemas_.size()); + } + + template <bool UseSkiff> + TMkqlInputSpec<UseSkiff>::TMkqlInputSpec(NYT::TNode schema, bool ignoreStreamTableIndex) + { + Schemas_.push_back(std::move(schema)); + IgnoreStreamTableIndex_ = ignoreStreamTableIndex; + AllTableNames_.push_back(Nothing()); + this->AllVirtualColumns_.push_back({}); + } + + template <bool UseSkiff> + const TVector<NYT::TNode>& TMkqlInputSpec<UseSkiff>::GetSchemas() const { + return Schemas_; + } + + template <bool UseSkiff> + bool TMkqlInputSpec<UseSkiff>::IgnoreStreamTableIndex() const { + return IgnoreStreamTableIndex_; + } + + template <bool UseSkiff> + bool TMkqlInputSpec<UseSkiff>::IsStrictSchema() const { + return StrictSchema_; + } + + template <bool UseSkiff> + TMkqlInputSpec<UseSkiff>& TMkqlInputSpec<UseSkiff>::SetStrictSchema(bool strictSchema) { + static const NYT::TNode stringType = NYT::TNode::CreateList().Add("DataType").Add("String"); + static const NYT::TNode othersColumntype = NYT::TNode::CreateList().Add("DictType").Add(stringType).Add(stringType); + + StrictSchema_ = strictSchema; + + for (size_t index = 0; index < Schemas_.size(); ++index) { + auto& schemaVirtualColumns = this->AllVirtualColumns_.at(index); + if (StrictSchema_) { + schemaVirtualColumns.erase(NYql::YqlOthersColumnName); + } else { + schemaVirtualColumns.emplace(NYql::YqlOthersColumnName, othersColumntype); + } + } + + return *this; + } + + template <bool UseSkiff> + bool TMkqlInputSpec<UseSkiff>::UseOriginalRowSpec() const { + return UseOriginalRowSpec_; + } + + template <bool UseSkiff> + TMkqlInputSpec<UseSkiff>& TMkqlInputSpec<UseSkiff>::SetUseOriginalRowSpec(bool value) { + UseOriginalRowSpec_ = value; + + return *this; + } + + template <bool UseSkiff> + const TMaybe<TVector<TString>>& TMkqlInputSpec<UseSkiff>::GetTableNames() const { + Y_ENSURE(AllTableNames_.size() == 1, "expected single-input spec"); + + return AllTableNames_[0]; + } + + template <bool UseSkiff> + const TMaybe<TVector<TString>>& TMkqlInputSpec<UseSkiff>::GetTableNames(ui32 index) const { + Y_ENSURE(index < AllTableNames_.size(), "invalid input index"); + + return AllTableNames_[index]; + } + + template <bool UseSkiff> + TMkqlInputSpec<UseSkiff>& TMkqlInputSpec<UseSkiff>::SetTableNames(TVector<TString> tableNames) { + Y_ENSURE(AllTableNames_.size() == 1, "expected single-input spec"); + + return SetTableNames(std::move(tableNames), 0); + } + + template <bool UseSkiff> + TMkqlInputSpec<UseSkiff>& TMkqlInputSpec<UseSkiff>::SetTableNames(TVector<TString> tableNames, ui32 index) { + Y_ENSURE(index < AllTableNames_.size(), "invalid input index"); + + auto& value = AllTableNames_[index]; + + if (!value.Defined()) { + YQL_ENSURE(NYql::YqlSysColumnPath == NYql::NPureCalc::PurecalcSysColumnTablePath); + YQL_ENSURE(NYql::GetSysColumnTypeId(PathColumnShortName) == NYql::NUdf::TDataType<char*>::Id); + this->AllVirtualColumns_.at(index).emplace( + NYql::YqlSysColumnPath, NYT::TNode::CreateList().Add("DataType").Add("String") + ); + } + + value = std::move(tableNames); + + return *this; + } + + template <bool UseSkiff> + TMkqlOutputSpec<UseSkiff>::TMkqlOutputSpec(NYT::TNode schema) + : Schema_(std::move(schema)) + { + } + + template <bool UseSkiff> + const NYT::TNode& TMkqlOutputSpec<UseSkiff>::GetSchema() const { + return Schema_; + } + + template <bool UseSkiff> + void TInputSpecTraits<TMkqlInputSpec<UseSkiff>>::PreparePullStreamWorker( + const TMkqlInputSpec<UseSkiff>& spec, + IPullStreamWorker* worker, + const TVector<IInputStream*>& streams + ) { + YQL_ENSURE( + worker->GetInputsCount() == streams.size(), + "number of input streams should match number of inputs provided by spec"); + + TVector<THolder<NYT::TRawTableReader>> wrappers; + auto& scopedAlloc = worker->GetScopedAlloc(); + for (ui32 i = 0; i < streams.size(); ++i) { + wrappers.push_back(MakeHolder<TRawTableReaderImpl>(streams[i], scopedAlloc)); + } + + NYql::NPureCalc::TInputSpecTraits<NYql::NPureCalc::TMkqlInputSpec<UseSkiff>>::PreparePullStreamWorker( + spec, + worker, + std::move(wrappers) + ); + } + + template <bool UseSkiff> + void TInputSpecTraits<TMkqlInputSpec<UseSkiff>>::PreparePullStreamWorker( + const TMkqlInputSpec<UseSkiff>& spec, + IPullStreamWorker* worker, + IInputStream* stream + ) { + TInputSpecTraits<TMkqlInputSpec<UseSkiff>>::PreparePullStreamWorker( + spec, + worker, + TVector<IInputStream*>({stream}) + ); + } + + template <bool UseSkiff> + void TInputSpecTraits<TMkqlInputSpec<UseSkiff>>::PreparePullStreamWorker( + const TMkqlInputSpec<UseSkiff>& spec, + IPullStreamWorker* worker, + TVector<THolder<IInputStream>>&& streams + ) { + YQL_ENSURE( + worker->GetInputsCount() == streams.size(), + "number of input streams should match number of inputs provided by spec"); + + TVector<THolder<NYT::TRawTableReader>> wrappers; + auto& scopedAlloc = worker->GetScopedAlloc(); + for (ui32 i = 0; i < streams.size(); ++i) { + wrappers.push_back(MakeHolder<TRawTableReaderImpl>(std::move(streams[i]), scopedAlloc)); + } + + TInputSpecTraits<NYql::NPureCalc::TMkqlInputSpec<UseSkiff>>::PreparePullStreamWorker( + spec, + worker, + std::move(wrappers) + ); + } + + template <bool UseSkiff> + void TInputSpecTraits<TMkqlInputSpec<UseSkiff>>::PreparePullStreamWorker( + const TMkqlInputSpec<UseSkiff>& spec, + IPullStreamWorker* worker, + THolder<IInputStream> stream + ) { + TInputSpecTraits<TMkqlInputSpec<UseSkiff>>::PreparePullStreamWorker( + spec, + worker, + VectorFromHolder<IInputStream>(std::move(stream)) + ); + } + + template <bool UseSkiff> + void TInputSpecTraits<TMkqlInputSpec<UseSkiff>>::PreparePullStreamWorker( + const TMkqlInputSpec<UseSkiff>& spec, + IPullStreamWorker* worker, + const TVector<NYT::TRawTableReader*>& streams + ) { + YQL_ENSURE( + worker->GetInputsCount() == streams.size(), + "number of input streams should match number of inputs provided by spec"); + + with_lock(worker->GetScopedAlloc()) { + auto& holderFactory = worker->GetGraph().GetHolderFactory(); + for (ui32 i = 0; i < streams.size(); ++i) { + TInputDescription inputDescription(spec, i); + auto input = holderFactory.Create<TMkqlListValue>( + UseSkiff, streams[i], worker, inputDescription, spec.IgnoreStreamTableIndex(), spec.IsStrictSchema() + ); + worker->SetInput(std::move(input), i); + } + } + } + + template <bool UseSkiff> + void TInputSpecTraits<TMkqlInputSpec<UseSkiff>>::PreparePullStreamWorker( + const TMkqlInputSpec<UseSkiff>& spec, + IPullStreamWorker* worker, + NYT::TRawTableReader* stream + ) { + TInputSpecTraits<TMkqlInputSpec<UseSkiff>>::PreparePullStreamWorker( + spec, + worker, + TVector<NYT::TRawTableReader*>({stream}) + ); + } + + template <bool UseSkiff> + void TInputSpecTraits<TMkqlInputSpec<UseSkiff>>::PreparePullStreamWorker( + const TMkqlInputSpec<UseSkiff>& spec, + IPullStreamWorker* worker, + TVector<THolder<NYT::TRawTableReader>>&& streams + ) { + YQL_ENSURE( + worker->GetInputsCount() == streams.size(), + "number of input streams should match number of inputs provided by spec"); + + with_lock(worker->GetScopedAlloc()) { + auto& holderFactory = worker->GetGraph().GetHolderFactory(); + for (ui32 i = 0; i < streams.size(); ++i) { + TInputDescription inputDescription(spec, i); + auto input = holderFactory.Create<TMkqlListValue>( + UseSkiff, std::move(streams[i]), worker, inputDescription, spec.IgnoreStreamTableIndex(), spec.IsStrictSchema() + ); + worker->SetInput(std::move(input), i); + } + } + } + + template <bool UseSkiff> + void TInputSpecTraits<TMkqlInputSpec<UseSkiff>>::PreparePullStreamWorker( + const TMkqlInputSpec<UseSkiff>& spec, + IPullStreamWorker* worker, + THolder<NYT::TRawTableReader> stream + ) { + TInputSpecTraits<TMkqlInputSpec<UseSkiff>>::PreparePullStreamWorker( + spec, + worker, + VectorFromHolder<NYT::TRawTableReader>(std::move(stream)) + ); + } + + template <bool UseSkiff> + void TInputSpecTraits<TMkqlInputSpec<UseSkiff>>::PreparePullListWorker( + const TMkqlInputSpec<UseSkiff>& spec, + IPullListWorker* worker, + const TVector<IInputStream*>& streams + ) { + YQL_ENSURE( + worker->GetInputsCount() == streams.size(), + "number of input streams should match number of inputs provided by spec"); + + TVector<THolder<NYT::TRawTableReader>> wrappers; + auto& scopedAlloc = worker->GetScopedAlloc(); + for (ui32 i = 0; i < streams.size(); ++i) { + wrappers.push_back(MakeHolder<TRawTableReaderImpl>(streams[i], scopedAlloc)); + } + + NYql::NPureCalc::TInputSpecTraits<NYql::NPureCalc::TMkqlInputSpec<UseSkiff>>::PreparePullListWorker( + spec, + worker, + std::move(wrappers) + ); + } + + template <bool UseSkiff> + void TInputSpecTraits<TMkqlInputSpec<UseSkiff>>::PreparePullListWorker( + const TMkqlInputSpec<UseSkiff>& spec, + IPullListWorker* worker, + IInputStream* stream + ) { + TInputSpecTraits<TMkqlInputSpec<UseSkiff>>::PreparePullListWorker( + spec, + worker, + TVector<IInputStream*>({stream}) + ); + } + + template <bool UseSkiff> + void TInputSpecTraits<TMkqlInputSpec<UseSkiff>>::PreparePullListWorker( + const TMkqlInputSpec<UseSkiff>& spec, + IPullListWorker* worker, + TVector<THolder<IInputStream>>&& streams + ) { + YQL_ENSURE( + worker->GetInputsCount() == streams.size(), + "number of input streams should match number of inputs provided by spec"); + + TVector<THolder<NYT::TRawTableReader>> wrappers; + auto& scopedAlloc = worker->GetScopedAlloc(); + for (ui32 i = 0; i < streams.size(); ++i) { + wrappers.push_back(MakeHolder<TRawTableReaderImpl>(std::move(streams[i]), scopedAlloc)); + } + + NYql::NPureCalc::TInputSpecTraits<NYql::NPureCalc::TMkqlInputSpec<UseSkiff>>::PreparePullListWorker( + spec, + worker, + std::move(wrappers) + ); + } + + template <bool UseSkiff> + void TInputSpecTraits<TMkqlInputSpec<UseSkiff>>::PreparePullListWorker( + const TMkqlInputSpec<UseSkiff>& spec, + IPullListWorker* worker, + THolder<IInputStream> stream + ) { + TInputSpecTraits<TMkqlInputSpec<UseSkiff>>::PreparePullListWorker( + spec, + worker, + VectorFromHolder<IInputStream>(std::move(stream)) + ); + } + + template <bool UseSkiff> + void TInputSpecTraits<TMkqlInputSpec<UseSkiff>>::PreparePullListWorker( + const TMkqlInputSpec<UseSkiff>& spec, + IPullListWorker* worker, + const TVector<NYT::TRawTableReader*>& streams + ) { + YQL_ENSURE( + worker->GetInputsCount() == streams.size(), + "number of input streams should match number of inputs provided by spec"); + + with_lock(worker->GetScopedAlloc()) { + auto& holderFactory = worker->GetGraph().GetHolderFactory(); + for (ui32 i = 0; i < streams.size(); ++i) { + TInputDescription inputDescription(spec, i); + auto input = holderFactory.Create<TMkqlListValue>( + UseSkiff, streams[i], worker, inputDescription, spec.IgnoreStreamTableIndex(), spec.IsStrictSchema() + ); + worker->SetInput(std::move(input), i); + } + } + } + + template <bool UseSkiff> + void TInputSpecTraits<TMkqlInputSpec<UseSkiff>>::PreparePullListWorker( + const TMkqlInputSpec<UseSkiff>& spec, + IPullListWorker* worker, + NYT::TRawTableReader* stream + ) { + TInputSpecTraits<TMkqlInputSpec<UseSkiff>>::PreparePullListWorker( + spec, + worker, + TVector<NYT::TRawTableReader*>({stream}) + ); + } + + template <bool UseSkiff> + void TInputSpecTraits<TMkqlInputSpec<UseSkiff>>::PreparePullListWorker( + const TMkqlInputSpec<UseSkiff>& spec, + IPullListWorker* worker, + TVector<THolder<NYT::TRawTableReader>>&& streams + ) { + YQL_ENSURE( + worker->GetInputsCount() == streams.size(), + "number of input streams should match number of inputs provided by spec"); + + with_lock(worker->GetScopedAlloc()) { + auto& holderFactory = worker->GetGraph().GetHolderFactory(); + for (ui32 i = 0; i < streams.size(); ++i) { + TInputDescription inputDescription(spec, i); + auto input = holderFactory.Create<TMkqlListValue>( + UseSkiff, std::move(streams[i]), worker, inputDescription, spec.IgnoreStreamTableIndex(), spec.IsStrictSchema() + ); + worker->SetInput(std::move(input), i); + } + } + } + + template <bool UseSkiff> + void TInputSpecTraits<TMkqlInputSpec<UseSkiff>>::PreparePullListWorker( + const TMkqlInputSpec<UseSkiff>& spec, + IPullListWorker* worker, + THolder<NYT::TRawTableReader> stream + ) { + TInputSpecTraits<TMkqlInputSpec<UseSkiff>>::PreparePullListWorker( + spec, + worker, + VectorFromHolder<NYT::TRawTableReader>(std::move(stream)) + ); + } + + template <bool UseSkiff> + THolder<THandle> TOutputSpecTraits<TMkqlOutputSpec<UseSkiff>>::ConvertPullListWorkerToOutputType( + const NYql::NPureCalc::TMkqlOutputSpec<UseSkiff>&, + NYql::NPureCalc::TWorkerHolder<NYql::NPureCalc::IPullListWorker> worker + ) { + with_lock(worker->GetScopedAlloc()) { + return MakeHolder<TPullListMkqlWriter>(std::move(worker), UseSkiff); + } + } + + template <bool UseSkiff> + THolder<THandle> TOutputSpecTraits<TMkqlOutputSpec<UseSkiff>>::ConvertPullStreamWorkerToOutputType( + const NYql::NPureCalc::TMkqlOutputSpec<UseSkiff>&, + NYql::NPureCalc::TWorkerHolder<NYql::NPureCalc::IPullStreamWorker> worker + ) { + with_lock(worker->GetScopedAlloc()) { + return MakeHolder<TPullStreamMkqlWriter>(std::move(worker), UseSkiff); + } + } + + template class TMkqlSpec<true, TInputSpecBase>; + template class TMkqlSpec<false, TInputSpecBase>; + template class TMkqlSpec<true, TOutputSpecBase>; + template class TMkqlSpec<false, TOutputSpecBase>; + + template class TMkqlInputSpec<true>; + template class TMkqlInputSpec<false>; + template class TMkqlOutputSpec<true>; + template class TMkqlOutputSpec<false>; + + template struct TInputSpecTraits<TMkqlInputSpec<true>>; + template struct TInputSpecTraits<TMkqlInputSpec<false>>; + template struct TOutputSpecTraits<TMkqlOutputSpec<true>>; + template struct TOutputSpecTraits<TMkqlOutputSpec<false>>; + } +} diff --git a/ydb/library/yql/public/purecalc/io_specs/mkql/spec.h b/ydb/library/yql/public/purecalc/io_specs/mkql/spec.h new file mode 100644 index 0000000000..ef4ceea6a2 --- /dev/null +++ b/ydb/library/yql/public/purecalc/io_specs/mkql/spec.h @@ -0,0 +1,231 @@ +#pragma once + +#include <ydb/library/yql/public/purecalc/common/interface.h> + +#include <util/generic/noncopyable.h> + +namespace NYT { + class TRawTableReader; +} + +namespace NYql { + namespace NPureCalc { + /** + * Processing mode for working with Skiff/YSON IO. + * + * In this mode purecalc accepts vector of pointers to `IInputStream` as an inputs and returns a handle + * which can be used to invoke program writing all output to a stream. + * + * For example: + * + * @code + * auto handle = program.Apply(&Cin); + * handle->Run(&Cout); // run the program, read from Cin and write to Cout + * @endcode + * + * All working modes except PushStream are supported. + */ + template <bool UseSkiff, typename TBase> + class TMkqlSpec: public TBase { + static_assert( + std::is_same<TBase, TInputSpecBase>::value || + std::is_same<TBase, TOutputSpecBase>::value, + "Class is used in unintended way!" + ); + }; + + /** + * Skiff/YSON input spec. In this mode purecalc takes a non-owning pointers to a text input streams and parses + * them using Skiff or YSON codec. + * + * The program synopsis follows: + * + * @code + * ... TPullStreamProgram::Apply(TVector<IInputStream*>); + * ... TPullStreamProgram::Apply(TVector<NYT::TRawTableReader*>); + * ... TPullListProgram::Apply(TVector<IInputStream*>); + * ... TPullListProgram::Apply(TVector<NYT::TRawTableReader*>); + * @endcode + * + * @tparam UseSkiff expect Skiff format if true, YSON otherwise. + */ + template <bool UseSkiff> + class TMkqlInputSpec: public TMkqlSpec<UseSkiff, TInputSpecBase> { + public: + using TBase = TInputSpecBase; + static constexpr bool UseSkiffValue = UseSkiff; + + private: + TVector<NYT::TNode> Schemas_; + bool StrictSchema_ = true; + bool IgnoreStreamTableIndex_ = false; + TVector<TMaybe<TVector<TString>>> AllTableNames_; + // Allows to read structure columns with custom members order. + // Instead of chain TNode => TTypeAnnotationNode => TType => TNode (which looses members order) use + // original schema as row spec. + bool UseOriginalRowSpec_ = false; + + public: + explicit TMkqlInputSpec(TVector<NYT::TNode>); + explicit TMkqlInputSpec(NYT::TNode, bool ignoreStreamTableIndex = false); + + const TVector<NYT::TNode>& GetSchemas() const override; + + bool IgnoreStreamTableIndex() const; + + bool IsStrictSchema() const; + TMkqlInputSpec& SetStrictSchema(bool strictSchema); + + const TMaybe<TVector<TString>>& GetTableNames() const; + const TMaybe<TVector<TString>>& GetTableNames(ui32) const; + bool UseOriginalRowSpec() const; + + TMkqlInputSpec& SetTableNames(TVector<TString>); + TMkqlInputSpec& SetTableNames(TVector<TString>, ui32); + TMkqlInputSpec& SetUseOriginalRowSpec(bool value); + }; + + /** + * Skiff/YSON output. In this mode purecalc returns a handle which can be used to invoke an underlying program. + * + * So far this is the only spec that supports multi-table output. + * + * The program synopsis follows: + * + * @code + * THolder<THandle> TPullStreamProgram::Apply(...); + * THolder<THandle> TPullListProgram::Apply(...); + * @endcode + * + * @tparam UseSkiff write output in Skiff format if true, use YSON otherwise. + */ + template <bool UseSkiff> + class TMkqlOutputSpec: public TMkqlSpec<UseSkiff, TOutputSpecBase> { + public: + using TMkqlSpec<UseSkiff, TOutputSpecBase>::TMkqlSpec; + + using TBase = TOutputSpecBase; + static constexpr bool UseSkiffValue = UseSkiff; + + private: + NYT::TNode Schema_; + + public: + explicit TMkqlOutputSpec(NYT::TNode); + + const NYT::TNode& GetSchema() const override; + }; + + /** + * A class which can invoke a purecalc program and store its output in the given output stream. + */ + class THandle: private TMoveOnly { + public: + /** + * Run the program. Read a chunk from the program's assigned input, parse it and pass it to the program. + * Than serialize the program's output and write it to the given output stream. Repeat until the input + * stream is empty. + */ + /// @{ + /** + * Overload for single-table output programs (i.e. output type is struct). + */ + virtual void Run(IOutputStream*) = 0; + /** + * Overload for multi-table output programs (i.e. output type is variant over tuple). + * Size of vector should match number of variant alternatives. + */ + virtual void Run(const TVector<IOutputStream*>&) = 0; + /** + * Overload for multi-table output programs (i.e. output type is variant over struct). + * Size of map should match number of variant alternatives. For every alternative there should be a stream + * in the map. + */ + virtual void Run(const TMap<TString, IOutputStream*>&) = 0; + /// @} + + virtual ~THandle() = default; + }; + + template <bool UseSkiff> + struct TInputSpecTraits<TMkqlInputSpec<UseSkiff>> { + static const constexpr bool IsPartial = false; + + static const constexpr bool SupportPullStreamMode = true; + static const constexpr bool SupportPullListMode = true; + static const constexpr bool SupportPushStreamMode = false; + + static void PreparePullStreamWorker( + const TMkqlInputSpec<UseSkiff>& spec, IPullStreamWorker* worker, const TVector<IInputStream*>& streams); + + static void PreparePullStreamWorker( + const TMkqlInputSpec<UseSkiff>& spec, IPullStreamWorker* worker, TVector<THolder<IInputStream>>&& streams); + + static void PreparePullStreamWorker( + const TMkqlInputSpec<UseSkiff>& spec, IPullStreamWorker* worker, const TVector<NYT::TRawTableReader*>& streams); + + static void PreparePullStreamWorker( + const TMkqlInputSpec<UseSkiff>& spec, IPullStreamWorker* worker, TVector<THolder<NYT::TRawTableReader>>&& streams); + + static void PreparePullListWorker( + const TMkqlInputSpec<UseSkiff>& spec, IPullListWorker* worker, const TVector<IInputStream*>& streams); + + static void PreparePullListWorker( + const TMkqlInputSpec<UseSkiff>& spec, IPullListWorker* worker, TVector<THolder<IInputStream>>&& streams); + + static void PreparePullListWorker( + const TMkqlInputSpec<UseSkiff>& spec, IPullListWorker* worker, const TVector<NYT::TRawTableReader*>& streams); + + static void PreparePullListWorker( + const TMkqlInputSpec<UseSkiff>& spec, IPullListWorker* worker, TVector<THolder<NYT::TRawTableReader>>&& streams); + + // Members for single-input programs + + static void PreparePullStreamWorker( + const TMkqlInputSpec<UseSkiff>& spec, IPullStreamWorker* worker, IInputStream* stream); + + static void PreparePullStreamWorker( + const TMkqlInputSpec<UseSkiff>& spec, IPullStreamWorker* worker, THolder<IInputStream> stream); + + static void PreparePullStreamWorker( + const TMkqlInputSpec<UseSkiff>& spec, IPullStreamWorker* worker, NYT::TRawTableReader* stream); + + static void PreparePullStreamWorker( + const TMkqlInputSpec<UseSkiff>& spec, IPullStreamWorker* worker, THolder<NYT::TRawTableReader> stream); + + static void PreparePullListWorker( + const TMkqlInputSpec<UseSkiff>& spec, IPullListWorker* worker, IInputStream* stream); + + static void PreparePullListWorker( + const TMkqlInputSpec<UseSkiff>& spec, IPullListWorker* worker, THolder<IInputStream> stream); + + static void PreparePullListWorker( + const TMkqlInputSpec<UseSkiff>& spec, IPullListWorker* worker, NYT::TRawTableReader* stream); + + static void PreparePullListWorker( + const TMkqlInputSpec<UseSkiff>& spec, IPullListWorker* worker, THolder<NYT::TRawTableReader> stream); + }; + + template <bool UseSkiff> + struct TOutputSpecTraits<TMkqlOutputSpec<UseSkiff>> { + static const constexpr bool IsPartial = false; + + static const constexpr bool SupportPullStreamMode = true; + static const constexpr bool SupportPullListMode = true; + static const constexpr bool SupportPushStreamMode = false; + + using TPullStreamReturnType = THolder<THandle>; + using TPullListReturnType = THolder<THandle>; + + static TPullStreamReturnType ConvertPullStreamWorkerToOutputType(const TMkqlOutputSpec<UseSkiff>&, TWorkerHolder<IPullStreamWorker>); + + static TPullListReturnType ConvertPullListWorkerToOutputType(const TMkqlOutputSpec<UseSkiff>&, TWorkerHolder<IPullListWorker>); + }; + + using TSkiffInputSpec = TMkqlInputSpec<true>; + using TSkiffOutputSpec = TMkqlOutputSpec<true>; + + using TYsonInputSpec = TMkqlInputSpec<false>; + using TYsonOutputSpec = TMkqlOutputSpec<false>; + } +} diff --git a/ydb/library/yql/public/purecalc/io_specs/mkql/ut/CMakeLists.darwin-x86_64.txt b/ydb/library/yql/public/purecalc/io_specs/mkql/ut/CMakeLists.darwin-x86_64.txt new file mode 100644 index 0000000000..08edd65bf8 --- /dev/null +++ b/ydb/library/yql/public/purecalc/io_specs/mkql/ut/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,77 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_executable(ydb-library-yql-public-purecalc-io_specs-mkql-ut) +target_compile_options(ydb-library-yql-public-purecalc-io_specs-mkql-ut PRIVATE + -DUSE_CURRENT_UDF_ABI_VERSION +) +target_link_libraries(ydb-library-yql-public-purecalc-io_specs-mkql-ut PUBLIC + contrib-libs-cxxsupp + yutil + library-cpp-cpuid_check + cpp-testing-unittest_main + udf-service-exception_policy + public-purecalc-common + purecalc-io_specs-mkql + purecalc-ut-lib +) +target_link_options(ydb-library-yql-public-purecalc-io_specs-mkql-ut PRIVATE + -Wl,-platform_version,macos,11.0,11.0 + -fPIC + -fPIC + -framework + CoreFoundation +) +target_sources(ydb-library-yql-public-purecalc-io_specs-mkql-ut PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/io_specs/mkql/ut/test_spec.cpp +) +set_property( + TARGET + ydb-library-yql-public-purecalc-io_specs-mkql-ut + PROPERTY + SPLIT_FACTOR + 1 +) +add_yunittest( + NAME + ydb-library-yql-public-purecalc-io_specs-mkql-ut + TEST_TARGET + ydb-library-yql-public-purecalc-io_specs-mkql-ut + TEST_ARG + --print-before-suite + --print-before-test + --fork-tests + --print-times + --show-fails +) +set_yunittest_property( + TEST + ydb-library-yql-public-purecalc-io_specs-mkql-ut + PROPERTY + LABELS + MEDIUM +) +set_yunittest_property( + TEST + ydb-library-yql-public-purecalc-io_specs-mkql-ut + PROPERTY + PROCESSORS + 1 +) +set_yunittest_property( + TEST + ydb-library-yql-public-purecalc-io_specs-mkql-ut + PROPERTY + TIMEOUT + 300 +) +target_allocator(ydb-library-yql-public-purecalc-io_specs-mkql-ut + system_allocator +) +vcs_info(ydb-library-yql-public-purecalc-io_specs-mkql-ut) diff --git a/ydb/library/yql/public/purecalc/io_specs/mkql/ut/CMakeLists.linux-aarch64.txt b/ydb/library/yql/public/purecalc/io_specs/mkql/ut/CMakeLists.linux-aarch64.txt new file mode 100644 index 0000000000..5027702669 --- /dev/null +++ b/ydb/library/yql/public/purecalc/io_specs/mkql/ut/CMakeLists.linux-aarch64.txt @@ -0,0 +1,80 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_executable(ydb-library-yql-public-purecalc-io_specs-mkql-ut) +target_compile_options(ydb-library-yql-public-purecalc-io_specs-mkql-ut PRIVATE + -DUSE_CURRENT_UDF_ABI_VERSION +) +target_link_libraries(ydb-library-yql-public-purecalc-io_specs-mkql-ut PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + cpp-testing-unittest_main + udf-service-exception_policy + public-purecalc-common + purecalc-io_specs-mkql + purecalc-ut-lib +) +target_link_options(ydb-library-yql-public-purecalc-io_specs-mkql-ut PRIVATE + -ldl + -lrt + -Wl,--no-as-needed + -fPIC + -fPIC + -lpthread + -lrt + -ldl +) +target_sources(ydb-library-yql-public-purecalc-io_specs-mkql-ut PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/io_specs/mkql/ut/test_spec.cpp +) +set_property( + TARGET + ydb-library-yql-public-purecalc-io_specs-mkql-ut + PROPERTY + SPLIT_FACTOR + 1 +) +add_yunittest( + NAME + ydb-library-yql-public-purecalc-io_specs-mkql-ut + TEST_TARGET + ydb-library-yql-public-purecalc-io_specs-mkql-ut + TEST_ARG + --print-before-suite + --print-before-test + --fork-tests + --print-times + --show-fails +) +set_yunittest_property( + TEST + ydb-library-yql-public-purecalc-io_specs-mkql-ut + PROPERTY + LABELS + MEDIUM +) +set_yunittest_property( + TEST + ydb-library-yql-public-purecalc-io_specs-mkql-ut + PROPERTY + PROCESSORS + 1 +) +set_yunittest_property( + TEST + ydb-library-yql-public-purecalc-io_specs-mkql-ut + PROPERTY + TIMEOUT + 300 +) +target_allocator(ydb-library-yql-public-purecalc-io_specs-mkql-ut + cpp-malloc-jemalloc +) +vcs_info(ydb-library-yql-public-purecalc-io_specs-mkql-ut) diff --git a/ydb/library/yql/public/purecalc/io_specs/mkql/ut/CMakeLists.linux-x86_64.txt b/ydb/library/yql/public/purecalc/io_specs/mkql/ut/CMakeLists.linux-x86_64.txt new file mode 100644 index 0000000000..36c85aeacb --- /dev/null +++ b/ydb/library/yql/public/purecalc/io_specs/mkql/ut/CMakeLists.linux-x86_64.txt @@ -0,0 +1,82 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_executable(ydb-library-yql-public-purecalc-io_specs-mkql-ut) +target_compile_options(ydb-library-yql-public-purecalc-io_specs-mkql-ut PRIVATE + -DUSE_CURRENT_UDF_ABI_VERSION +) +target_link_libraries(ydb-library-yql-public-purecalc-io_specs-mkql-ut PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + library-cpp-cpuid_check + cpp-testing-unittest_main + udf-service-exception_policy + public-purecalc-common + purecalc-io_specs-mkql + purecalc-ut-lib +) +target_link_options(ydb-library-yql-public-purecalc-io_specs-mkql-ut PRIVATE + -ldl + -lrt + -Wl,--no-as-needed + -fPIC + -fPIC + -lpthread + -lrt + -ldl +) +target_sources(ydb-library-yql-public-purecalc-io_specs-mkql-ut PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/io_specs/mkql/ut/test_spec.cpp +) +set_property( + TARGET + ydb-library-yql-public-purecalc-io_specs-mkql-ut + PROPERTY + SPLIT_FACTOR + 1 +) +add_yunittest( + NAME + ydb-library-yql-public-purecalc-io_specs-mkql-ut + TEST_TARGET + ydb-library-yql-public-purecalc-io_specs-mkql-ut + TEST_ARG + --print-before-suite + --print-before-test + --fork-tests + --print-times + --show-fails +) +set_yunittest_property( + TEST + ydb-library-yql-public-purecalc-io_specs-mkql-ut + PROPERTY + LABELS + MEDIUM +) +set_yunittest_property( + TEST + ydb-library-yql-public-purecalc-io_specs-mkql-ut + PROPERTY + PROCESSORS + 1 +) +set_yunittest_property( + TEST + ydb-library-yql-public-purecalc-io_specs-mkql-ut + PROPERTY + TIMEOUT + 300 +) +target_allocator(ydb-library-yql-public-purecalc-io_specs-mkql-ut + cpp-malloc-tcmalloc + libs-tcmalloc-no_percpu_cache +) +vcs_info(ydb-library-yql-public-purecalc-io_specs-mkql-ut) diff --git a/ydb/library/yql/public/purecalc/io_specs/mkql/ut/CMakeLists.txt b/ydb/library/yql/public/purecalc/io_specs/mkql/ut/CMakeLists.txt new file mode 100644 index 0000000000..f8b31df0c1 --- /dev/null +++ b/ydb/library/yql/public/purecalc/io_specs/mkql/ut/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) + include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/ydb/library/yql/public/purecalc/io_specs/mkql/ut/CMakeLists.windows-x86_64.txt b/ydb/library/yql/public/purecalc/io_specs/mkql/ut/CMakeLists.windows-x86_64.txt new file mode 100644 index 0000000000..be8ef1154c --- /dev/null +++ b/ydb/library/yql/public/purecalc/io_specs/mkql/ut/CMakeLists.windows-x86_64.txt @@ -0,0 +1,70 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_executable(ydb-library-yql-public-purecalc-io_specs-mkql-ut) +target_compile_options(ydb-library-yql-public-purecalc-io_specs-mkql-ut PRIVATE + -DUSE_CURRENT_UDF_ABI_VERSION +) +target_link_libraries(ydb-library-yql-public-purecalc-io_specs-mkql-ut PUBLIC + contrib-libs-cxxsupp + yutil + library-cpp-cpuid_check + cpp-testing-unittest_main + udf-service-exception_policy + public-purecalc-common + purecalc-io_specs-mkql + purecalc-ut-lib +) +target_sources(ydb-library-yql-public-purecalc-io_specs-mkql-ut PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/io_specs/mkql/ut/test_spec.cpp +) +set_property( + TARGET + ydb-library-yql-public-purecalc-io_specs-mkql-ut + PROPERTY + SPLIT_FACTOR + 1 +) +add_yunittest( + NAME + ydb-library-yql-public-purecalc-io_specs-mkql-ut + TEST_TARGET + ydb-library-yql-public-purecalc-io_specs-mkql-ut + TEST_ARG + --print-before-suite + --print-before-test + --fork-tests + --print-times + --show-fails +) +set_yunittest_property( + TEST + ydb-library-yql-public-purecalc-io_specs-mkql-ut + PROPERTY + LABELS + MEDIUM +) +set_yunittest_property( + TEST + ydb-library-yql-public-purecalc-io_specs-mkql-ut + PROPERTY + PROCESSORS + 1 +) +set_yunittest_property( + TEST + ydb-library-yql-public-purecalc-io_specs-mkql-ut + PROPERTY + TIMEOUT + 300 +) +target_allocator(ydb-library-yql-public-purecalc-io_specs-mkql-ut + system_allocator +) +vcs_info(ydb-library-yql-public-purecalc-io_specs-mkql-ut) diff --git a/ydb/library/yql/public/purecalc/io_specs/mkql/ut/test.inl b/ydb/library/yql/public/purecalc/io_specs/mkql/ut/test.inl new file mode 100644 index 0000000000..e148bb2bab --- /dev/null +++ b/ydb/library/yql/public/purecalc/io_specs/mkql/ut/test.inl @@ -0,0 +1,777 @@ +Y_UNIT_TEST_SUITE(TEST_SUITE_NAME) { + using NYql::NPureCalc::NPrivate::GetSchema; + + Y_UNIT_TEST(TestAllTypes) { + using namespace NYql::NPureCalc; + + TVector<TString> fields {"int64", "uint64", "double", "bool", "string", "yson"}; + auto schema = GetSchema(fields); + auto stream = GET_STREAM(fields); + + auto factory = MakeProgramFactory(); + + { + auto program = CREATE_PROGRAM( + INPUT_SPEC {schema}, + OUTPUT_SPEC {schema}, + "SELECT * FROM Input", + ETranslationMode::SQL, 1 + ); + + auto input = TStringStream(stream); + auto handle = program->Apply(&input); + TStringStream output; + handle->Run(&output); + + ASSERT_EQUAL_STREAMS(stream, output); + } + + // invalid table prefix + UNIT_ASSERT_EXCEPTION_CONTAINS([&](){ + CREATE_PROGRAM( + INPUT_SPEC {schema}, + OUTPUT_SPEC {schema}, + "SELECT * FROM Table", + ETranslationMode::SQL, 1 + ); + }(), TCompileError, "Failed to optimize"); + + // invalid table suffix (input index) + UNIT_ASSERT_EXCEPTION_CONTAINS([&](){ + CREATE_PROGRAM( + INPUT_SPEC {schema}, + OUTPUT_SPEC {schema}, + "SELECT * FROM Input1", + ETranslationMode::SQL, 1 + ); + }(), TCompileError, "Failed to optimize"); + } + + Y_UNIT_TEST(TestColumnsFilter) { + using namespace NYql::NPureCalc; + + TVector<TString> fields {"int64", "uint64", "double", "bool", "string", "yson"}; + auto schema = GetSchema(fields); + auto stream = GET_STREAM(fields); + + TVector<TString> someFields {"int64", "bool", "string"}; + auto someSchema = GetSchema(someFields); + auto someStream = GET_STREAM(someFields); + + auto factory = MakeProgramFactory(); + + { + auto inputSpec = INPUT_SPEC {schema}; + auto outputSpec = OUTPUT_SPEC {someSchema}; + + auto program = CREATE_PROGRAM( + inputSpec, + outputSpec, + "SELECT `int64`, `bool`, `string` FROM Input", + ETranslationMode::SQL, 1 + ); + + UNIT_ASSERT_VALUES_EQUAL( + program->GetUsedColumns(), + THashSet<TString>(someFields.begin(), someFields.end()) + ); + + UNIT_ASSERT_VALUES_EQUAL( + program->GetUsedColumns(0), + program->GetUsedColumns() + ); + + UNIT_ASSERT_EXCEPTION_CONTAINS(([&](){ + auto unused = program->GetUsedColumns(1); + }()), yexception, "invalid input index (1) in GetUsedColumns call"); + + auto input = TStringStream(stream); + auto handle = program->Apply(&input); + TStringStream output; + handle->Run(&output); + + ASSERT_EQUAL_STREAMS(someStream, output); + + UNIT_ASSERT_EXCEPTION_CONTAINS(([&](){ + auto outputs = TVector<IOutputStream*>({}); + program->Apply(&input)->Run(outputs); + }()), yexception, "cannot be used with single-output programs"); + + UNIT_ASSERT_EXCEPTION_CONTAINS(([&](){ + auto outputs = TVector<IOutputStream*>({&output}); + program->Apply(&input)->Run(outputs); + }()), yexception, "cannot be used with single-output programs"); + + UNIT_ASSERT_EXCEPTION_CONTAINS(([&](){ + auto outputs = TMap<TString, IOutputStream*>(); + program->Apply(&input)->Run(outputs); + }()), yexception, "cannot be used with single-output programs"); + } + } + +#ifdef PULL_LIST_MODE + Y_UNIT_TEST(TestColumnsFilterMultiInput) { + using namespace NYql::NPureCalc; + + TVector<TString> fields0 {"int64", "uint64", "double"}; + auto schema0 = GetSchema(fields0); + TVector<TString> someFields0 {"int64", "uint64"}; + + TVector<TString> fields1 {"bool", "string", "yson"}; + auto schema1 = GetSchema(fields1); + TVector<TString> someFields1 {"bool", "yson"}; + + TVector<TString> unitedFields {"int64", "uint64", "bool", "yson"}; + auto unitedSchema = GetSchema(unitedFields, unitedFields); + + auto factory = MakeProgramFactory(); + + { + auto inputSpec = INPUT_SPEC {{schema0, schema1}}; + auto outputSpec = OUTPUT_SPEC {unitedSchema}; + + auto program = CREATE_PROGRAM( + inputSpec, + outputSpec, + R"( +SELECT `int64`, `uint64` FROM Input0 +UNION ALL +SELECT `bool`, `yson` FROM Input1 + )", + ETranslationMode::SQL, 1 + ); + + UNIT_ASSERT_VALUES_EQUAL( + program->GetUsedColumns(0), + THashSet<TString>(someFields0.begin(), someFields0.end()) + ); + + UNIT_ASSERT_VALUES_EQUAL( + program->GetUsedColumns(1), + THashSet<TString>(someFields1.begin(), someFields1.end()) + ); + + UNIT_ASSERT_EXCEPTION_CONTAINS(([&](){ + auto unused = program->GetUsedColumns(); + }()), yexception, "GetUsedColumns() can be used only with single-input programs"); + + UNIT_ASSERT_EXCEPTION_CONTAINS(([&](){ + auto unused = program->GetUsedColumns(2); + }()), yexception, "invalid input index (2) in GetUsedColumns call"); + } + } +#endif + + Y_UNIT_TEST(TestColumnsFilterWithOptionalFields) { + using namespace NYql::NPureCalc; + + TVector<TString> fields {"int64", "uint64", "double", "bool", "string", "yson"}; + auto schema = GetSchema(fields); + auto stream = GET_STREAM(fields); + + TVector<TString> someFields {"int64", "bool", "string"}; + TVector<TString> someOptionalFields {"string"}; + + auto someSchema = GetSchema(someFields); + auto someStream = GET_STREAM(someFields, someOptionalFields); + auto someOptionalSchema = GetSchema(someFields, someOptionalFields); + + auto factory = MakeProgramFactory(); + + { + auto program = CREATE_PROGRAM( + INPUT_SPEC {schema}, + OUTPUT_SPEC {someOptionalSchema}, + "SELECT `int64`, `bool`, Nothing(String?) as `string` FROM Input", + ETranslationMode::SQL, 1 + ); + + UNIT_ASSERT_VALUES_EQUAL( + program->GetUsedColumns(), + THashSet<TString>({"int64", "bool"}) + ); + + UNIT_ASSERT_VALUES_EQUAL( + program->GetUsedColumns(), + program->GetUsedColumns(0) + ); + + auto input = TStringStream(stream); + auto handle = program->Apply(&input); + TStringStream output; + handle->Run(&output); + + ASSERT_EQUAL_STREAMS(someStream, output); + } + + UNIT_ASSERT_EXCEPTION_CONTAINS([&](){ + CREATE_PROGRAM( + INPUT_SPEC {schema}, + OUTPUT_SPEC {someSchema}, + "SELECT `int64`, `bool`, Nothing(String?) as `string` FROM Input", + ETranslationMode::SQL, 1 + ); + }(), TCompileError, "Failed to optimize"); + } + + Y_UNIT_TEST(TestOutputSpecInference) { + using namespace NYql::NPureCalc; + + TVector<TString> fields {"int64", "uint64", "double", "bool", "string"}; + auto schema = GetSchema(fields); + auto stream = GET_STREAM(fields); + + TVector<TString> someFields {"bool", "int64", "string"}; // Keep this sorted... + auto someSchema = GetSchema(someFields); + auto someStream = GET_STREAM(someFields); + + auto factory = MakeProgramFactory(); + + { + auto inputSpec = INPUT_SPEC {schema}; + auto outputSpec = OUTPUT_SPEC {NYT::TNode::CreateEntity()}; + + auto program = CREATE_PROGRAM( + inputSpec, + outputSpec, + "SELECT `int64`, `bool`, `string` FROM Input", + ETranslationMode::SQL, 1 + ); + + UNIT_ASSERT_EQUAL(program->MakeFullOutputSchema(), someSchema); + + UNIT_ASSERT_VALUES_EQUAL( + program->GetUsedColumns(), + THashSet<TString>(someFields.begin(), someFields.end()) + ); + + UNIT_ASSERT_VALUES_EQUAL( + program->GetUsedColumns(), + program->GetUsedColumns(0) + ); + + auto input = TStringStream(stream); + auto handle = program->Apply(&input); + TStringStream output; + handle->Run(&output); + + ASSERT_EQUAL_STREAMS(someStream, output); + } + } + +#ifdef PULL_LIST_MODE + Y_UNIT_TEST(TestJoinInputs) { + using namespace NYql::NPureCalc; + + TVector<TString> fields0 {"int64", "uint64", "double"}; + auto schema0 = GetSchema(fields0); + auto stream0 = GET_STREAM(fields0); + + TVector<TString> fields1 {"int64", "bool", "string"}; + auto schema1 = GetSchema(fields1); + auto stream1 = GET_STREAM(fields1); + + TVector<TString> joinedFields {"bool", "double", "int64", "string", "uint64"}; // keep this sorted + auto joinedSchema = GetSchema(joinedFields); + auto joinedStream = GET_STREAM(joinedFields); + + auto factory = MakeProgramFactory(); + + { + auto inputSpec = INPUT_SPEC {{schema0, schema1}}; + auto outputSpec = OUTPUT_SPEC {NYT::TNode::CreateEntity()}; + + auto program = CREATE_PROGRAM( + inputSpec, + outputSpec, + R"( +SELECT + t0.`int64` AS `int64`, + t0.`uint64` AS `uint64`, + t0.`double` AS `double`, + t1.`bool` AS `bool`, + t1.`string` AS `string` +FROM + Input0 AS t0 +INNER JOIN + Input1 AS t1 +ON t0.`int64` == t1.`int64` +ORDER BY `int64` + )", + ETranslationMode::SQL, 1 + ); + + UNIT_ASSERT_EQUAL(program->MakeFullOutputSchema(), joinedSchema); + + UNIT_ASSERT_VALUES_EQUAL( + program->GetUsedColumns(0), + THashSet<TString>(fields0.begin(), fields0.end()) + ); + + UNIT_ASSERT_VALUES_EQUAL( + program->GetUsedColumns(1), + THashSet<TString>(fields1.begin(), fields1.end()) + ); + + TStringStream input0(stream0); + TStringStream input1(stream1); + auto handle = program->Apply<TVector<IInputStream*>>({&input0, &input1}); + TStringStream output; + handle->Run(&output); + + ASSERT_EQUAL_STREAMS(joinedStream, output); + } + } +#endif + + Y_UNIT_TEST(TestMultiOutputOverTuple) { + using namespace NYql::NPureCalc; + + TVector<TString> fields {"int64", "uint64", "double", "bool", "string"}; + auto schema = GetSchema(fields); + auto stream = GET_STREAM(fields, {}, 0, 10, 1); + + TVector<TString> someFields1 {"bool", "int64", "string"}; + auto someSchema1 = GetSchema(someFields1); + auto someStream1 = GET_STREAM(someFields1, {}, 0, 10, 2); + + TVector<TString> someFields2 {"bool", "double"}; + auto someSchema2 = GetSchema(someFields2); + auto someStream2 = GET_STREAM(someFields2, {}, 1, 10, 2); + + auto factory = MakeProgramFactory(); + + { + auto inputSpec = INPUT_SPEC {schema}; + auto outputSpec = OUTPUT_SPEC {NYT::TNode::CreateEntity()}; + + auto program = CREATE_PROGRAM( + inputSpec, + outputSpec, + R"( +( + (let vt (ParseType '"Variant<Struct<bool:Bool, int64:Int64, string:String>, Struct<bool:Bool, double:Double>>")) + (return (Map (Self '0) (lambda '(x) (block '( + (let r1 (Variant (AsStruct '('bool (Member x 'bool)) '('int64 (Member x 'int64)) '('string (Member x 'string))) '0 vt)) + (let r2 (Variant (AsStruct '('bool (Member x 'bool)) '('double (Member x 'double))) '1 vt)) + (return (If (Coalesce (== (% (Member x 'int64) (Int64 '2)) (Int64 '0)) (Bool 'false)) r1 r2)) + ))))) +) + )", + ETranslationMode::SExpr + ); + + auto input = TStringStream(stream); + auto handle = program->Apply(&input); + TStringStream output1, output2; + auto outputs = TVector<IOutputStream*>({&output1, &output2}); + handle->Run(outputs); + ASSERT_EQUAL_STREAMS(someStream1, output1); + ASSERT_EQUAL_STREAMS(someStream2, output2); + + UNIT_ASSERT_EXCEPTION_CONTAINS(([&](){ + program->Apply(&input)->Run(&output1); + }()), yexception, "cannot be used with multi-output programs"); + + UNIT_ASSERT_EXCEPTION_CONTAINS(([&](){ + auto outputs = TVector<IOutputStream*>({}); + program->Apply(&input)->Run(outputs); + }()), yexception, "Number of variant alternatives should match number of streams"); + + UNIT_ASSERT_EXCEPTION_CONTAINS(([&](){ + auto outputs = TVector<IOutputStream*>({&output1, &output1, &output1}); + program->Apply(&input)->Run(outputs); + }()), yexception, "Number of variant alternatives should match number of streams"); + + UNIT_ASSERT_EXCEPTION_CONTAINS(([&](){ + auto outputs = TMap<TString, IOutputStream*>(); + program->Apply(&input)->Run(outputs); + }()), yexception, "cannot be used to process variants over tuple"); + } + } + + Y_UNIT_TEST(TestMultiOutputOverStruct) { + using namespace NYql::NPureCalc; + + TVector<TString> fields {"int64", "uint64", "double", "bool", "string"}; + auto schema = GetSchema(fields); + auto stream = GET_STREAM(fields, {}, 0, 10, 1); + + TVector<TString> someFields1 {"bool", "int64", "string"}; + auto someSchema1 = GetSchema(someFields1); + auto someStream1 = GET_STREAM(someFields1, {}, 0, 10, 2); + + TVector<TString> someFields2 {"bool", "double"}; + auto someSchema2 = GetSchema(someFields2); + auto someStream2 = GET_STREAM(someFields2, {}, 1, 10, 2); + + auto factory = MakeProgramFactory(); + + { + auto inputSpec = INPUT_SPEC {schema}; + auto outputSpec = OUTPUT_SPEC {NYT::TNode::CreateEntity()}; + + auto program = CREATE_PROGRAM( + inputSpec, + outputSpec, + R"( +( + (let vt (ParseType '"Variant<A2:Struct<bool:Bool, double:Double>, A1:Struct<bool:Bool, int64:Int64, string:String>>")) + (return (Map (Self '0) (lambda '(x) (block '( + (let r1 (Variant (AsStruct '('bool (Member x 'bool)) '('int64 (Member x 'int64)) '('string (Member x 'string))) 'A1 vt)) + (let r2 (Variant (AsStruct '('bool (Member x 'bool)) '('double (Member x 'double))) 'A2 vt)) + (return (If (Coalesce (== (% (Member x 'int64) (Int64 '2)) (Int64 '0)) (Bool 'false)) r1 r2)) + ))))) +) + )", + ETranslationMode::SExpr + ); + + auto input = TStringStream(stream); + auto handle = program->Apply(&input); + TStringStream output1, output2; + auto outputs = TMap<TString, IOutputStream*>(); + outputs["A1"] = &output1; + outputs["A2"] = &output2; + handle->Run(outputs); + ASSERT_EQUAL_STREAMS(someStream1, output1); + ASSERT_EQUAL_STREAMS(someStream2, output2); + + UNIT_ASSERT_EXCEPTION_CONTAINS(([&](){ + program->Apply(&input)->Run(&output1); + }()), yexception, "cannot be used with multi-output programs"); + + UNIT_ASSERT_EXCEPTION_CONTAINS(([&](){ + auto outputs = TVector<IOutputStream*>({}); + program->Apply(&input)->Run(outputs); + }()), yexception, "cannot be used to process variants over struct"); + + UNIT_ASSERT_EXCEPTION_CONTAINS(([&](){ + auto outputs = TMap<TString, IOutputStream*>(); + outputs["A1"] = &output1; + program->Apply(&input)->Run(outputs); + }()), yexception, "Number of variant alternatives should match number of streams"); + + UNIT_ASSERT_EXCEPTION_CONTAINS(([&](){ + auto outputs = TMap<TString, IOutputStream*>(); + outputs["A1"] = &output1; + outputs["A2"] = &output1; + outputs["A3"] = &output1; + program->Apply(&input)->Run(outputs); + }()), yexception, "Number of variant alternatives should match number of streams"); + + UNIT_ASSERT_EXCEPTION_CONTAINS(([&](){ + auto outputs = TMap<TString, IOutputStream*>(); + outputs["A1"] = &output1; + outputs["B1"] = &output1; + program->Apply(&input)->Run(outputs); + }()), yexception, "Cannot find stream for alternative \"A2\""); + } + } + +#ifdef GET_STREAM_WITH_STRUCT + Y_UNIT_TEST(TestReadNativeStructs) { + using namespace NYql::NPureCalc; + + auto factory = MakeProgramFactory( + TProgramFactoryOptions().SetNativeYtTypeFlags(NYql::NTCF_PRODUCTION) + ); + + auto runProgram = [&factory](bool sorted) -> TStringStream { + auto inputSchema = GET_SCHEMA_WITH_STRUCT(sorted); + + auto input0 = GET_STREAM_WITH_STRUCT(sorted, 0, 2); + auto input1 = GET_STREAM_WITH_STRUCT(sorted, 2, 4); + + auto inputSpec = INPUT_SPEC{{inputSchema, inputSchema}}.SetUseOriginalRowSpec(!sorted); + auto outputSpec = OUTPUT_SPEC{NYT::TNode::CreateEntity()}; + + auto program = CREATE_PROGRAM( + inputSpec, + outputSpec, + R"( +( + (return (Extend (Self '0) (Self '1))) +) + )", + ETranslationMode::SExpr + ); + + TStringStream result; + + auto handle = program->Apply(TVector<IInputStream*>({&input0, &input1})); + handle->Run(&result); + + return result; + }; + + auto etalon = GET_STREAM_WITH_STRUCT(true, 0, 4); + + auto output0 = runProgram(true); + auto output1 = runProgram(false); + + ASSERT_EQUAL_STREAMS(output0, etalon); + ASSERT_EQUAL_STREAMS(output1, etalon); + } +#endif + + Y_UNIT_TEST(TestIndependentProcessings) { + using namespace NYql::NPureCalc; + + TVector<TString> fields0 {"double", "int64", "string"}; // keep this sorted + auto schema0 = GetSchema(fields0); + auto stream0 = GET_STREAM(fields0, {}, 0, 10, 1); + + TVector<TString> someFields0 {"int64", "string"}; + auto someStream0 = GET_STREAM(someFields0, {}, 0, 10, 2); // sample with even int64 numbers + + TVector<TString> fields1 {"bool", "int64", "uint64"}; // keep this sorted + auto schema1 = GetSchema(fields1); + auto stream1 = GET_STREAM(fields1, {}, 0, 10, 1); + + TVector<TString> someFields1 {"int64", "uint64"}; + auto someStream1 = GET_STREAM(someFields1, {}, 1, 10, 2); // sample with odd int64 numbers + + auto factory = MakeProgramFactory(); + + { + auto inputSpec = INPUT_SPEC {{schema0, schema1}}; + auto outputSpec = OUTPUT_SPEC {NYT::TNode::CreateEntity()}; + + auto program = CREATE_PROGRAM( + inputSpec, + outputSpec, + R"( +( + (let $type (ParseType '"Variant<Struct<int64: Int64, string:String>, Struct<int64:Int64, uint64: Uint64>>")) + (let $stream0 (FlatMap (Self '0) (lambda '(x) (block '( + (let $item (Variant (AsStruct '('int64 (Member x 'int64)) '('string (Member x 'string))) '0 $type)) + (return (ListIf (Coalesce (== (% (Member x 'int64) (Int64 '2)) (Int64 '0)) (Bool 'false)) $item)) + ))))) + (let $stream1 (FlatMap (Self '1) (lambda '(x) (block '( + (let $item (Variant (AsStruct '('int64 (Member x 'int64)) '('uint64 (Member x 'uint64))) '1 $type)) + (return (ListIf (Coalesce (== (% (Member x 'int64) (Int64 '2)) (Int64 '1)) (Bool 'false)) $item)) + ))))) + (return (Extend $stream0 $stream1)) +) + )", + ETranslationMode::SExpr + ); + + UNIT_ASSERT_EQUAL(program->MakeInputSchema(0), schema0); + UNIT_ASSERT_EQUAL(program->MakeInputSchema(1), schema1); + + UNIT_ASSERT_EXCEPTION_CONTAINS(([&](){ + auto unused = program->MakeInputSchema(2); + }()), yexception, "invalid input index (2) in MakeInputSchema call"); + + UNIT_ASSERT_EXCEPTION_CONTAINS(([&](){ + auto unused = program->MakeInputSchema(); + }()), yexception, "MakeInputSchema() can be used only with single-input programs"); + + TStringStream input0(stream0); + TStringStream input1(stream1); + auto handle = program->Apply(TVector<IInputStream*>({&input0, &input1})); + TStringStream output0, output1; + handle->Run(TVector<IOutputStream*>({&output0, &output1})); + + ASSERT_EQUAL_STREAMS(someStream0, output0); + ASSERT_EQUAL_STREAMS(someStream1, output1); + + UNIT_ASSERT_EXCEPTION_CONTAINS(([&](){ + auto unused = program->Apply(TVector<IInputStream*>()); + }()), yexception, "number of input streams should match number of inputs"); + + UNIT_ASSERT_EXCEPTION_CONTAINS(([&](){ + auto unused = program->Apply(TVector<IInputStream*>({&input0})); + }()), yexception, "number of input streams should match number of inputs"); + + UNIT_ASSERT_EXCEPTION_CONTAINS(([&](){ + TStringStream input2; + auto unused = program->Apply(TVector<IInputStream*>({&input0, &input1, &input2})); + }()), yexception, "number of input streams should match number of inputs"); + + UNIT_ASSERT_EXCEPTION_CONTAINS(([&](){ + auto unused = program->Apply(&input0); + }()), yexception, "number of input streams should match number of inputs"); + } + } + + Y_UNIT_TEST(TestMergeInputs) { + using namespace NYql::NPureCalc; + + TVector<TString> fields0 {"double", "int64", "string", "uint64"}; // keep this sorted + auto schema0 = GetSchema(fields0); + auto stream0 = GET_STREAM(fields0, {}, 0, 5, 1); + + TVector<TString> fields1 {"double", "int64", "uint64", "yson"}; // keep this sorted + auto schema1 = GetSchema(fields1); + auto stream1 = GET_STREAM(fields1, {}, 5, 10, 1); + + TVector<TString> someFields {"double", "int64", "uint64"}; // keep this sorted + auto mergedStream = GET_STREAM(someFields, {}, 0, 10, 1); + auto mergedSchema = GetSchema(someFields); + + auto factory = MakeProgramFactory(); + + { + auto inputSpec = INPUT_SPEC {{schema0, schema1}}; + auto outputSpec = OUTPUT_SPEC {NYT::TNode::CreateEntity()}; + + auto program = CREATE_PROGRAM( + inputSpec, + outputSpec, + R"( +( + (let $stream0 (Map (Self '0) (lambda '(x) (RemoveMember x 'string)))) + (let $stream1 (Map (Self '1) (lambda '(x) (RemoveMember x 'yson)))) + (return (Extend $stream0 $stream1)) +) + )", + ETranslationMode::SExpr + ); + + UNIT_ASSERT_EQUAL(program->MakeInputSchema(0), schema0); + UNIT_ASSERT_EQUAL(program->MakeInputSchema(1), schema1); + UNIT_ASSERT_EQUAL(program->MakeFullOutputSchema(), mergedSchema); + + TStringStream input0(stream0); + TStringStream input1(stream1); + auto handle = program->Apply(TVector<IInputStream*>({&input0, &input1})); + TStringStream output; + handle->Run(&output); + + ASSERT_EQUAL_STREAMS(mergedStream, output); + } + } + + Y_UNIT_TEST(TestTableName) { + using namespace NYql::NPureCalc; + + TVector<TVector<int>> values = {{3, 5}}; + + auto inputSchema = GetSchema({"int64"}); + auto stream = GET_MULTITABLE_STREAM(values); + auto etalon = GET_MULTITABLE_STREAM(values, {"Input"}); + + auto factory = MakeProgramFactory(TProgramFactoryOptions().SetUseSystemColumns(true)); + + { + auto program = CREATE_PROGRAM( + INPUT_SPEC(inputSchema), + OUTPUT_SPEC(NYT::TNode::CreateEntity()), + "SELECT `int64`, TableName() AS `tname` FROM Input", + ETranslationMode::SQL + ); + + auto handle = program->Apply(&stream); + TStringStream output; + handle->Run(&output); + + ASSERT_EQUAL_STREAMS(output, etalon); + } + } + + Y_UNIT_TEST(TestCustomTableName) { + using namespace NYql::NPureCalc; + + TVector<TVector<int>> values = {{3, 5}, {2, 8}}; + TVector<TString> tableNames = {"One", "Two"}; + + auto inputSchema = GetSchema({"int64"}); + auto stream = GET_MULTITABLE_STREAM(values); + auto etalon = GET_MULTITABLE_STREAM(values, tableNames); + + auto factory = MakeProgramFactory(TProgramFactoryOptions().SetUseSystemColumns(true)); + + { + auto program = CREATE_PROGRAM( + INPUT_SPEC(inputSchema).SetTableNames(tableNames), + OUTPUT_SPEC(NYT::TNode::CreateEntity()), + "SELECT `int64`, TableName() AS `tname` FROM TABLES()", + ETranslationMode::SQL + ); + + auto handle = program->Apply(&stream); + TStringStream output; + handle->Run(&output); + + ASSERT_EQUAL_STREAMS(output, etalon); + } + } + +#ifdef PULL_LIST_MODE + Y_UNIT_TEST(TestMultiinputTableName) { + using namespace NYql::NPureCalc; + + TVector<TVector<int>> values0 = {{3, 5}}; + TVector<TVector<int>> values1 = {{7, 9}}; + + auto inputSchema = GetSchema({"int64"}); + auto stream0 = GET_MULTITABLE_STREAM(values0); + auto stream1 = GET_MULTITABLE_STREAM(values1); + auto etalon = GET_MULTITABLE_STREAM(JoinVectors(values0, values1), {"Input0", "Input1"}); + + auto factory = MakeProgramFactory(TProgramFactoryOptions().SetUseSystemColumns(true)); + + { + auto program = CREATE_PROGRAM( + INPUT_SPEC({inputSchema, inputSchema}), + OUTPUT_SPEC(NYT::TNode::CreateEntity()), + R"( +$union = ( + SELECT * FROM Input0 + UNION ALL + SELECT * FROM Input1 +); +SELECT TableName() AS `tname`, `int64` FROM $union + )" + ); + + auto handle = program->Apply(TVector<IInputStream*>{&stream0, &stream1}); + TStringStream output; + handle->Run(&output); + + ASSERT_EQUAL_STREAMS(output, etalon); + } + } + + Y_UNIT_TEST(TestMultiinputCustomTableName) { + using namespace NYql::NPureCalc; + + TVector<TVector<int>> values0 = {{1, 4}, {2, 8}}; + TVector<TVector<int>> values1 = {{3, 5}, {7, 9}}; + TVector<TString> tableNames0 = {"OneA", "TwoA"}; + TVector<TString> tableNames1 = {"OneB", "TwoB"}; + + auto inputSchema = GetSchema({"int64"}); + auto stream0 = GET_MULTITABLE_STREAM(values0); + auto stream1 = GET_MULTITABLE_STREAM(values1); + auto etalon = GET_MULTITABLE_STREAM(JoinVectors(values0, values1), JoinVectors(tableNames0, tableNames1)); + + auto factory = MakeProgramFactory(TProgramFactoryOptions().SetUseSystemColumns(true)); + + { + auto program = CREATE_PROGRAM( + INPUT_SPEC({inputSchema, inputSchema}).SetTableNames(tableNames0, 0).SetTableNames(tableNames1, 1), + OUTPUT_SPEC(NYT::TNode::CreateEntity()), + R"( +$input0, $input1 = PROCESS TABLES(); +$union = ( + SELECT * FROM $input0 + UNION ALL + SELECT * FROM $input1 +); +SELECT TableName() AS `tname`, `int64` FROM $union + )" + ); + + auto handle = program->Apply(TVector<IInputStream*>{&stream0, &stream1}); + TStringStream output; + handle->Run(&output); + + ASSERT_EQUAL_STREAMS(output, etalon); + } + } +#endif +} diff --git a/ydb/library/yql/public/purecalc/io_specs/mkql/ut/test_spec.cpp b/ydb/library/yql/public/purecalc/io_specs/mkql/ut/test_spec.cpp new file mode 100644 index 0000000000..255e815e8f --- /dev/null +++ b/ydb/library/yql/public/purecalc/io_specs/mkql/ut/test_spec.cpp @@ -0,0 +1,325 @@ +#include <ydb/library/yql/providers/common/codec/yql_codec_type_flags.h> + +#include <library/cpp/testing/unittest/registar.h> +#include <library/cpp/yson/writer.h> + +#include <library/cpp/yson/node/node.h> +#include <library/cpp/yson/node/node_io.h> +#include <library/cpp/yson/node/node_visitor.h> + +#include <ydb/library/yql/public/purecalc/common/interface.h> +#include <ydb/library/yql/public/purecalc/io_specs/mkql/spec.h> +#include <ydb/library/yql/public/purecalc/ut/lib/helpers.h> + +#include <util/generic/hash_set.h> +#include <util/generic/ptr.h> +#include <util/stream/str.h> + +#include <library/cpp/skiff/skiff.h> + +#include <util/generic/yexception.h> + + +namespace { + TStringStream GetYsonStream( + const TVector<TString>& fields, + const TVector<TString>& optionalFields={}, + ui32 start = 0, ui32 stop = 5, ui32 step = 1 + ) { + THashSet<TString> filter {fields.begin(), fields.end()}; + THashSet<TString> optionalFilter {optionalFields.begin(), optionalFields.end()}; + + auto addField = [&] ( + NYT::TNode& node, const TString& field, NYT::TNode&& value + ) { + if (filter.contains(field) && !optionalFilter.contains(field)) { + node(field, value); + } + }; + + TStringStream stream; + NYson::TYsonWriter writer(&stream, NYson::EYsonFormat::Binary, NYson::EYsonType::ListFragment); + NYT::TNodeVisitor visitor(&writer); + + for (ui32 i = start; i < stop; i += step) { + auto item = NYT::TNode::CreateMap(); + + addField(item, "int64", (i64)(i)); + addField(item, "uint64", (ui64)(i * 2)); + addField(item, "double", (double)(i * 3.5)); + addField(item, "bool", true); + addField(item, "string", "foo"); + addField(item, "yson", (i % 2 == 0 ? NYT::TNode(true) : NYT::TNode(false))); + + visitor.Visit(item); + } + + return stream; + } + + TStringStream GetMultitableYsonStream( + const TVector<TVector<int>>& groupedValues, + const TVector<TString>& etalonTableNames = {} + ) { + bool isEtalon = !etalonTableNames.empty(); + + Y_ENSURE(!isEtalon || groupedValues.size() == etalonTableNames.size()); + + TStringStream stream; + NYson::TYsonWriter writer(&stream, NYson::EYsonFormat::Binary, NYson::EYsonType::ListFragment); + NYT::TNodeVisitor visitor(&writer); + + for (ui64 tableIndex = 0; tableIndex < groupedValues.size(); ++tableIndex) { + if (!isEtalon) { + auto indexNode = NYT::TNode::CreateEntity(); + indexNode.Attributes() = NYT::TNode::CreateMap()("table_index", static_cast<i64>(tableIndex)); + visitor.Visit(indexNode); + } + + const auto& values = groupedValues[tableIndex]; + + for (ui64 i = 0; i < values.size(); ++i) { + auto item = NYT::TNode::CreateMap()("int64", values[i]); + if (isEtalon) { + item("tname", etalonTableNames[tableIndex]); + } + visitor.Visit(item); + } + } + + return stream; + } + + void AssertEqualYsonStreams(TStringStream etalonStream, TStringStream stream) { + NYT::TNode etalonList { + NYT::NodeFromYsonStream(&etalonStream, NYson::EYsonType::ListFragment) + }; + + NYT::TNode list { + NYT::NodeFromYsonStream(&stream, NYson::EYsonType::ListFragment) + }; + + UNIT_ASSERT_EQUAL(etalonList, list); + } + + TStringStream GetSkiffStream( + const TVector<TString>& fields, + const TVector<TString>& optionalFields={}, + ui32 start = 0, ui32 stop = 5, ui32 step = 1 + ) { + THashSet<TString> filter {fields.begin(), fields.end()}; + THashSet<TString> optionalFilter {optionalFields.begin(), optionalFields.end()}; + + TStringStream stream; + NSkiff::TUncheckedSkiffWriter writer {&stream}; + +#define WRITE_FIELD(field, type, value) \ + do { \ + if (filter.contains(field)) { \ + if (optionalFilter.contains(field)) { \ + writer.WriteVariant8Tag(0); \ + } else { \ + writer.Write ## type(value); \ + } \ + } \ + } while (0) + + for (ui32 i = start; i < stop; i += step) { + auto item = NYT::TNode::CreateMap(); + + writer.WriteVariant16Tag(0); + WRITE_FIELD("bool", Boolean, true); + WRITE_FIELD("double", Double, (double)(i * 3.5)); + WRITE_FIELD("int64", Int64, (i64)(i)); + WRITE_FIELD("string", String32, "foo"); + WRITE_FIELD("uint64", Uint64, (ui64)(i * 2)); + WRITE_FIELD("yson", Yson32, (i % 2 == 0 ? "\x05" : "\x04")); // boolean values + } + +#undef WRITE_FIELD + + return stream; + } + + TStringStream GetMultitableSkiffStream( + const TVector<TVector<int>>& groupedValues, + const TVector<TString>& etalonTableNames = {} + ) { + bool isEtalon = !etalonTableNames.empty(); + + Y_ENSURE(!isEtalon || groupedValues.size() == etalonTableNames.size()); + + TStringStream stream; + NSkiff::TUncheckedSkiffWriter writer {&stream}; + + for (ui64 tableIndex = 0; tableIndex < groupedValues.size(); ++tableIndex) { + const auto& values = groupedValues[tableIndex]; + + for (ui64 i = 0; i < values.size(); ++i) { + if (isEtalon) { + writer.WriteVariant16Tag(0); + } else { + writer.WriteVariant16Tag(tableIndex); + } + + writer.WriteInt64(values[i]); + if (isEtalon) { + writer.WriteString32(etalonTableNames[tableIndex]); + } + } + } + + return stream; + } + + NYT::TNode GetSkiffSchemaWithStruct(bool sorted) { + auto aMember = NYT::TNode::CreateList() + .Add("a") + .Add(NYT::TNode::CreateList().Add("DataType").Add("String")); + + auto bMember = NYT::TNode::CreateList() + .Add("b") + .Add(NYT::TNode::CreateList().Add("DataType").Add("Uint64")); + + auto members = NYT::TNode::CreateList(); + + if (sorted) { + members.Add(std::move(aMember)).Add(std::move(bMember)); + } else { + members.Add(std::move(bMember)).Add(std::move(aMember)); + } + + auto structColumn = NYT::TNode::CreateList() + .Add("Struct") + .Add(NYT::TNode::CreateList().Add("StructType").Add(std::move(members))); + + auto indexColumn = NYT::TNode::CreateList() + .Add("Index") + .Add(NYT::TNode::CreateList().Add("DataType").Add("Uint64")); + + auto schema = NYT::TNode::CreateList() + .Add("StructType") + .Add(NYT::TNode::CreateList().Add(std::move(indexColumn)).Add(std::move(structColumn))); + + return schema; + } + + TStringStream GetSkiffStreamWithStruct(bool sorted, ui32 start = 0, ui32 stop = 5) { + TStringStream stream; + NSkiff::TUncheckedSkiffWriter writer {&stream}; + + auto writeStructMembers = [sorted, &writer](TStringBuf stringMember, ui64 numberMember) { + if (sorted) { + writer.WriteString32(stringMember); + writer.WriteUint64(numberMember); + } else { + writer.WriteUint64(numberMember); + writer.WriteString32(stringMember); + } + }; + + for (ui32 idx = start; idx < stop; ++idx) { + auto stringData = TStringBuilder{} << "text" << idx; + writer.WriteVariant16Tag(0); + writer.WriteUint64(idx); + writeStructMembers(stringData, idx + 3); + } + + return stream; + } + + void AssertEqualSkiffStreams(TStringStream etalonStream, TStringStream stream) { + UNIT_ASSERT_VALUES_EQUAL(etalonStream.Str(), stream.Str()); + } +} + +template <typename T> +TVector<T> JoinVectors(const TVector<T>& first, const TVector<T>& second) { + TVector<T> result; + result.reserve(first.size() + second.size()); + + result.insert(result.end(), first.begin(), first.end()); + result.insert(result.end(), second.begin(), second.end()); + + return result; +} + +#define PULL_STREAM_MODE +#define TEST_SUITE_NAME TestPullStreamYsonIO +#define CREATE_PROGRAM(...) factory->MakePullStreamProgram(__VA_ARGS__) +#define INPUT_SPEC TYsonInputSpec +#define OUTPUT_SPEC TYsonOutputSpec +#define GET_STREAM GetYsonStream +#define GET_MULTITABLE_STREAM GetMultitableYsonStream +#define ASSERT_EQUAL_STREAMS AssertEqualYsonStreams +#include "test.inl" +#undef ASSERT_EQUAL_STREAMS +#undef GET_MULTITABLE_STREAM +#undef GET_STREAM +#undef OUTPUT_SPEC +#undef INPUT_SPEC +#undef CREATE_PROGRAM +#undef TEST_SUITE_NAME +#undef PULL_STREAM_MODE + +#define PULL_STREAM_MODE +#define TEST_SUITE_NAME TestPullStreamSkiffIO +#define CREATE_PROGRAM(...) factory->MakePullStreamProgram(__VA_ARGS__) +#define INPUT_SPEC TSkiffInputSpec +#define OUTPUT_SPEC TSkiffOutputSpec +#define GET_STREAM GetSkiffStream +#define GET_STREAM_WITH_STRUCT GetSkiffStreamWithStruct +#define GET_SCHEMA_WITH_STRUCT GetSkiffSchemaWithStruct +#define GET_MULTITABLE_STREAM GetMultitableSkiffStream +#define ASSERT_EQUAL_STREAMS AssertEqualSkiffStreams +#include "test.inl" +#undef ASSERT_EQUAL_STREAMS +#undef GET_MULTITABLE_STREAM +#undef GET_SCHEMA_WITH_STRUCT +#undef GET_STREAM_WITH_STRUCT +#undef GET_STREAM +#undef OUTPUT_SPEC +#undef INPUT_SPEC +#undef CREATE_PROGRAM +#undef TEST_SUITE_NAME +#undef PULL_STREAM_MODE + +#define PULL_LIST_MODE +#define TEST_SUITE_NAME TestPullListYsonIO +#define CREATE_PROGRAM(...) factory->MakePullListProgram(__VA_ARGS__) +#define INPUT_SPEC TYsonInputSpec +#define OUTPUT_SPEC TYsonOutputSpec +#define GET_STREAM GetYsonStream +#define GET_MULTITABLE_STREAM GetMultitableYsonStream +#define ASSERT_EQUAL_STREAMS AssertEqualYsonStreams +#include "test.inl" +#undef ASSERT_EQUAL_STREAMS +#undef GET_MULTITABLE_STREAM +#undef GET_STREAM +#undef OUTPUT_SPEC +#undef INPUT_SPEC +#undef CREATE_PROGRAM +#undef TEST_SUITE_NAME +#undef PULL_LIST_MODE + +#define PULL_LIST_MODE +#define TEST_SUITE_NAME TestPullListSkiffIO +#define CREATE_PROGRAM(...) factory->MakePullListProgram(__VA_ARGS__) +#define INPUT_SPEC TSkiffInputSpec +#define OUTPUT_SPEC TSkiffOutputSpec +#define GET_STREAM GetSkiffStream +#define GET_STREAM_WITH_STRUCT GetSkiffStreamWithStruct +#define GET_SCHEMA_WITH_STRUCT GetSkiffSchemaWithStruct +#define GET_MULTITABLE_STREAM GetMultitableSkiffStream +#define ASSERT_EQUAL_STREAMS AssertEqualSkiffStreams +#include "test.inl" +#undef ASSERT_EQUAL_STREAMS +#undef GET_MULTITABLE_STREAM +#undef GET_SCHEMA_WITH_STRUCT +#undef GET_STREAM_WITH_STRUCT +#undef GET_STREAM +#undef OUTPUT_SPEC +#undef INPUT_SPEC +#undef CREATE_PROGRAM +#undef TEST_SUITE_NAME +#undef PULL_LIST_MODE diff --git a/ydb/library/yql/public/purecalc/io_specs/mkql/ut/ya.make b/ydb/library/yql/public/purecalc/io_specs/mkql/ut/ya.make new file mode 100644 index 0000000000..afc48d4356 --- /dev/null +++ b/ydb/library/yql/public/purecalc/io_specs/mkql/ut/ya.make @@ -0,0 +1,20 @@ +UNITTEST() + +SIZE(MEDIUM) + +TIMEOUT(300) + +PEERDIR( + ydb/library/yql/public/udf/service/exception_policy + ydb/library/yql/public/purecalc/common + ydb/library/yql/public/purecalc/io_specs/mkql + ydb/library/yql/public/purecalc/ut/lib +) + +YQL_LAST_ABI_VERSION() + +SRCS( + test_spec.cpp +) + +END() diff --git a/ydb/library/yql/public/purecalc/io_specs/mkql/ya.make b/ydb/library/yql/public/purecalc/io_specs/mkql/ya.make new file mode 100644 index 0000000000..b6066163f7 --- /dev/null +++ b/ydb/library/yql/public/purecalc/io_specs/mkql/ya.make @@ -0,0 +1,25 @@ +LIBRARY() + +PEERDIR( + ydb/library/yql/public/purecalc/common + ydb/library/yql/providers/yt/codec + ydb/library/yql/providers/yt/common + ydb/library/yql/providers/yt/lib/mkql_helpers + ydb/library/yql/providers/common/codec + ydb/library/yql/providers/common/schema/mkql +) + + + YQL_LAST_ABI_VERSION() + + +SRCS( + spec.cpp + spec.h +) + +END() + +RECURSE_FOR_TESTS( + ut +) diff --git a/ydb/library/yql/public/purecalc/io_specs/protobuf/CMakeLists.darwin-x86_64.txt b/ydb/library/yql/public/purecalc/io_specs/protobuf/CMakeLists.darwin-x86_64.txt new file mode 100644 index 0000000000..b21a40ca76 --- /dev/null +++ b/ydb/library/yql/public/purecalc/io_specs/protobuf/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,24 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(ut) + +add_library(purecalc-io_specs-protobuf) +target_compile_options(purecalc-io_specs-protobuf PRIVATE + -DUSE_CURRENT_UDF_ABI_VERSION +) +target_link_libraries(purecalc-io_specs-protobuf PUBLIC + contrib-libs-cxxsupp + yutil + public-purecalc-common + purecalc-io_specs-protobuf_raw +) +target_sources(purecalc-io_specs-protobuf PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/io_specs/protobuf/spec.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/io_specs/protobuf/proto_variant.cpp +) diff --git a/ydb/library/yql/public/purecalc/io_specs/protobuf/CMakeLists.linux-aarch64.txt b/ydb/library/yql/public/purecalc/io_specs/protobuf/CMakeLists.linux-aarch64.txt new file mode 100644 index 0000000000..8dc53c6230 --- /dev/null +++ b/ydb/library/yql/public/purecalc/io_specs/protobuf/CMakeLists.linux-aarch64.txt @@ -0,0 +1,25 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(ut) + +add_library(purecalc-io_specs-protobuf) +target_compile_options(purecalc-io_specs-protobuf PRIVATE + -DUSE_CURRENT_UDF_ABI_VERSION +) +target_link_libraries(purecalc-io_specs-protobuf PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + public-purecalc-common + purecalc-io_specs-protobuf_raw +) +target_sources(purecalc-io_specs-protobuf PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/io_specs/protobuf/spec.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/io_specs/protobuf/proto_variant.cpp +) diff --git a/ydb/library/yql/public/purecalc/io_specs/protobuf/CMakeLists.linux-x86_64.txt b/ydb/library/yql/public/purecalc/io_specs/protobuf/CMakeLists.linux-x86_64.txt new file mode 100644 index 0000000000..8dc53c6230 --- /dev/null +++ b/ydb/library/yql/public/purecalc/io_specs/protobuf/CMakeLists.linux-x86_64.txt @@ -0,0 +1,25 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(ut) + +add_library(purecalc-io_specs-protobuf) +target_compile_options(purecalc-io_specs-protobuf PRIVATE + -DUSE_CURRENT_UDF_ABI_VERSION +) +target_link_libraries(purecalc-io_specs-protobuf PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + public-purecalc-common + purecalc-io_specs-protobuf_raw +) +target_sources(purecalc-io_specs-protobuf PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/io_specs/protobuf/spec.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/io_specs/protobuf/proto_variant.cpp +) diff --git a/ydb/library/yql/public/purecalc/io_specs/protobuf/CMakeLists.txt b/ydb/library/yql/public/purecalc/io_specs/protobuf/CMakeLists.txt new file mode 100644 index 0000000000..f8b31df0c1 --- /dev/null +++ b/ydb/library/yql/public/purecalc/io_specs/protobuf/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) + include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/ydb/library/yql/public/purecalc/io_specs/protobuf/CMakeLists.windows-x86_64.txt b/ydb/library/yql/public/purecalc/io_specs/protobuf/CMakeLists.windows-x86_64.txt new file mode 100644 index 0000000000..b21a40ca76 --- /dev/null +++ b/ydb/library/yql/public/purecalc/io_specs/protobuf/CMakeLists.windows-x86_64.txt @@ -0,0 +1,24 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(ut) + +add_library(purecalc-io_specs-protobuf) +target_compile_options(purecalc-io_specs-protobuf PRIVATE + -DUSE_CURRENT_UDF_ABI_VERSION +) +target_link_libraries(purecalc-io_specs-protobuf PUBLIC + contrib-libs-cxxsupp + yutil + public-purecalc-common + purecalc-io_specs-protobuf_raw +) +target_sources(purecalc-io_specs-protobuf PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/io_specs/protobuf/spec.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/io_specs/protobuf/proto_variant.cpp +) diff --git a/ydb/library/yql/public/purecalc/io_specs/protobuf/proto_variant.cpp b/ydb/library/yql/public/purecalc/io_specs/protobuf/proto_variant.cpp new file mode 100644 index 0000000000..90f0b339ca --- /dev/null +++ b/ydb/library/yql/public/purecalc/io_specs/protobuf/proto_variant.cpp @@ -0,0 +1 @@ +#include "proto_variant.h" diff --git a/ydb/library/yql/public/purecalc/io_specs/protobuf/proto_variant.h b/ydb/library/yql/public/purecalc/io_specs/protobuf/proto_variant.h new file mode 100644 index 0000000000..c7d137d0e6 --- /dev/null +++ b/ydb/library/yql/public/purecalc/io_specs/protobuf/proto_variant.h @@ -0,0 +1,80 @@ +#pragma once + +#include <ydb/library/yql/public/purecalc/common/interface.h> + +#include <array> + +namespace NYql::NPureCalc::NPrivate { + using TProtoRawMultiOutput = std::pair<ui32, google::protobuf::Message*>; + + template <typename... T> + using TProtoMultiOutput = std::variant<T*...>; + + template <size_t I, typename... T> + using TProtoOutput = std::add_pointer_t<typename TTypeList<T...>::template TGet<I>>; + + template <size_t I, typename... T> + TProtoMultiOutput<T...> InitProtobufsVariant(google::protobuf::Message* ptr) { + static_assert(std::conjunction_v<std::is_base_of<google::protobuf::Message, T>...>); + return TProtoMultiOutput<T...>(std::in_place_index<I>, static_cast<TProtoOutput<I, T...>>(ptr)); + } + + template <typename... T> + class TProtobufsMappingBase { + public: + TProtobufsMappingBase() + : InitFuncs_(BuildInitFuncs(std::make_index_sequence<sizeof...(T)>())) + { + } + + private: + typedef TProtoMultiOutput<T...> (*initfunc)(google::protobuf::Message*); + + template <size_t... I> + inline std::array<initfunc, sizeof...(T)> BuildInitFuncs(std::index_sequence<I...>) { + return {&InitProtobufsVariant<I, T...>...}; + } + + protected: + const std::array<initfunc, sizeof...(T)> InitFuncs_; + }; + + template <typename... T> + class TProtobufsMappingStream: public IStream<TProtoMultiOutput<T...>>, public TProtobufsMappingBase<T...> { + public: + TProtobufsMappingStream(THolder<IStream<TProtoRawMultiOutput>> oldStream) + : OldStream_(std::move(oldStream)) + { + } + + public: + TProtoMultiOutput<T...> Fetch() override { + auto&& oldItem = OldStream_->Fetch(); + return this->InitFuncs_[oldItem.first](oldItem.second); + } + + private: + THolder<IStream<TProtoRawMultiOutput>> OldStream_; + }; + + template <typename... T> + class TProtobufsMappingConsumer: public IConsumer<TProtoRawMultiOutput>, public TProtobufsMappingBase<T...> { + public: + TProtobufsMappingConsumer(THolder<IConsumer<TProtoMultiOutput<T...>>> oldConsumer) + : OldConsumer_(std::move(oldConsumer)) + { + } + + public: + void OnObject(TProtoRawMultiOutput oldItem) override { + OldConsumer_->OnObject(this->InitFuncs_[oldItem.first](oldItem.second)); + } + + void OnFinish() override { + OldConsumer_->OnFinish(); + } + + private: + THolder<IConsumer<TProtoMultiOutput<T...>>> OldConsumer_; + }; +} diff --git a/ydb/library/yql/public/purecalc/io_specs/protobuf/spec.cpp b/ydb/library/yql/public/purecalc/io_specs/protobuf/spec.cpp new file mode 100644 index 0000000000..91de6c290a --- /dev/null +++ b/ydb/library/yql/public/purecalc/io_specs/protobuf/spec.cpp @@ -0,0 +1 @@ +#include "spec.h" diff --git a/ydb/library/yql/public/purecalc/io_specs/protobuf/spec.h b/ydb/library/yql/public/purecalc/io_specs/protobuf/spec.h new file mode 100644 index 0000000000..53a4a2f96e --- /dev/null +++ b/ydb/library/yql/public/purecalc/io_specs/protobuf/spec.h @@ -0,0 +1,147 @@ +#pragma once + +#include "proto_variant.h" + +#include <ydb/library/yql/public/purecalc/io_specs/protobuf_raw/spec.h> + +namespace NYql { + namespace NPureCalc { + /** + * Processing mode for working with non-raw protobuf messages. + * + * @tparam T message type. + */ + template <typename T> + class TProtobufInputSpec: public TProtobufRawInputSpec { + static_assert(std::is_base_of<google::protobuf::Message, T>::value, + "should be derived from google::protobuf::Message"); + public: + TProtobufInputSpec( + const TMaybe<TString>& timestampColumn = Nothing(), + const TProtoSchemaOptions& options = {} + ) + : TProtobufRawInputSpec(*T::descriptor(), timestampColumn, options) + { + } + }; + + /** + * Processing mode for working with non-raw protobuf messages. + * + * @tparam T message type. + */ + template <typename T> + class TProtobufOutputSpec: public TProtobufRawOutputSpec { + static_assert(std::is_base_of<google::protobuf::Message, T>::value, + "should be derived from google::protobuf::Message"); + public: + TProtobufOutputSpec( + const TProtoSchemaOptions& options = {}, + google::protobuf::Arena* arena = nullptr + ) + : TProtobufRawOutputSpec(*T::descriptor(), nullptr, options, arena) + { + } + }; + + /** + * Processing mode for working with non-raw protobuf messages and several outputs. + */ + template <typename... T> + class TProtobufMultiOutputSpec: public TProtobufRawMultiOutputSpec { + static_assert( + std::conjunction_v<std::is_base_of<google::protobuf::Message, T>...>, + "all types should be derived from google::protobuf::Message"); + public: + TProtobufMultiOutputSpec( + const TProtoSchemaOptions& options = {}, + TMaybe<TVector<google::protobuf::Arena*>> arenas = {} + ) + : TProtobufRawMultiOutputSpec({T::descriptor()...}, Nothing(), options, std::move(arenas)) + { + } + }; + + template <typename T> + struct TInputSpecTraits<TProtobufInputSpec<T>> { + static const constexpr bool IsPartial = false; + + static const constexpr bool SupportPullStreamMode = true; + static const constexpr bool SupportPullListMode = true; + static const constexpr bool SupportPushStreamMode = true; + + using TConsumerType = THolder<IConsumer<T*>>; + + static void PreparePullStreamWorker(const TProtobufInputSpec<T>& inputSpec, IPullStreamWorker* worker, THolder<IStream<T*>> stream) { + auto raw = ConvertStream<google::protobuf::Message*>(std::move(stream)); + TInputSpecTraits<TProtobufRawInputSpec>::PreparePullStreamWorker(inputSpec, worker, std::move(raw)); + } + + static void PreparePullListWorker(const TProtobufInputSpec<T>& inputSpec, IPullListWorker* worker, THolder<IStream<T*>> stream) { + auto raw = ConvertStream<google::protobuf::Message*>(std::move(stream)); + TInputSpecTraits<TProtobufRawInputSpec>::PreparePullListWorker(inputSpec, worker, std::move(raw)); + } + + static TConsumerType MakeConsumer(const TProtobufInputSpec<T>& inputSpec, TWorkerHolder<IPushStreamWorker> worker) { + auto raw = TInputSpecTraits<TProtobufRawInputSpec>::MakeConsumer(inputSpec, std::move(worker)); + return ConvertConsumer<T*>(std::move(raw)); + } + }; + + template <typename T> + struct TOutputSpecTraits<TProtobufOutputSpec<T>> { + static const constexpr bool IsPartial = false; + + static const constexpr bool SupportPullStreamMode = true; + static const constexpr bool SupportPullListMode = true; + static const constexpr bool SupportPushStreamMode = true; + + using TOutputItemType = T*; + using TPullStreamReturnType = THolder<IStream<TOutputItemType>>; + using TPullListReturnType = THolder<IStream<TOutputItemType>>; + + static TPullStreamReturnType ConvertPullStreamWorkerToOutputType(const TProtobufOutputSpec<T>& outputSpec, TWorkerHolder<IPullStreamWorker> worker) { + auto raw = TOutputSpecTraits<TProtobufRawOutputSpec>::ConvertPullStreamWorkerToOutputType(outputSpec, std::move(worker)); + return ConvertStreamUnsafe<TOutputItemType>(std::move(raw)); + } + + static TPullListReturnType ConvertPullListWorkerToOutputType(const TProtobufOutputSpec<T>& outputSpec, TWorkerHolder<IPullListWorker> worker) { + auto raw = TOutputSpecTraits<TProtobufRawOutputSpec>::ConvertPullListWorkerToOutputType(outputSpec, std::move(worker)); + return ConvertStreamUnsafe<TOutputItemType>(std::move(raw)); + } + + static void SetConsumerToWorker(const TProtobufOutputSpec<T>& outputSpec, IPushStreamWorker* worker, THolder<IConsumer<T*>> consumer) { + auto raw = ConvertConsumerUnsafe<google::protobuf::Message*>(std::move(consumer)); + TOutputSpecTraits<TProtobufRawOutputSpec>::SetConsumerToWorker(outputSpec, worker, std::move(raw)); + } + }; + + template <typename... T> + struct TOutputSpecTraits<TProtobufMultiOutputSpec<T...>> { + static const constexpr bool IsPartial = false; + + static const constexpr bool SupportPullStreamMode = true; + static const constexpr bool SupportPullListMode = true; + static const constexpr bool SupportPushStreamMode = true; + + using TOutputItemType = std::variant<T*...>; + using TPullStreamReturnType = THolder<IStream<TOutputItemType>>; + using TPullListReturnType = THolder<IStream<TOutputItemType>>; + + static TPullStreamReturnType ConvertPullStreamWorkerToOutputType(const TProtobufMultiOutputSpec<T...>& outputSpec, TWorkerHolder<IPullStreamWorker> worker) { + auto raw = TOutputSpecTraits<TProtobufRawMultiOutputSpec>::ConvertPullStreamWorkerToOutputType(outputSpec, std::move(worker)); + return THolder(new NPrivate::TProtobufsMappingStream<T...>(std::move(raw))); + } + + static TPullListReturnType ConvertPullListWorkerToOutputType(const TProtobufMultiOutputSpec<T...>& outputSpec, TWorkerHolder<IPullListWorker> worker) { + auto raw = TOutputSpecTraits<TProtobufRawMultiOutputSpec>::ConvertPullListWorkerToOutputType(outputSpec, std::move(worker)); + return THolder(new NPrivate::TProtobufsMappingStream<T...>(std::move(raw))); + } + + static void SetConsumerToWorker(const TProtobufMultiOutputSpec<T...>& outputSpec, IPushStreamWorker* worker, THolder<IConsumer<TOutputItemType>> consumer) { + auto wrapper = MakeHolder<NPrivate::TProtobufsMappingConsumer<T...>>(std::move(consumer)); + TOutputSpecTraits<TProtobufRawMultiOutputSpec>::SetConsumerToWorker(outputSpec, worker, std::move(wrapper)); + } + }; + } +} diff --git a/ydb/library/yql/public/purecalc/io_specs/protobuf/ut/CMakeLists.darwin-x86_64.txt b/ydb/library/yql/public/purecalc/io_specs/protobuf/ut/CMakeLists.darwin-x86_64.txt new file mode 100644 index 0000000000..b5599ef496 --- /dev/null +++ b/ydb/library/yql/public/purecalc/io_specs/protobuf/ut/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,71 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_executable(yql-public-purecalc-io_specs-protobuf-ut) +target_compile_options(yql-public-purecalc-io_specs-protobuf-ut PRIVATE + -DUSE_CURRENT_UDF_ABI_VERSION +) +target_link_libraries(yql-public-purecalc-io_specs-protobuf-ut PUBLIC + contrib-libs-cxxsupp + yutil + library-cpp-cpuid_check + cpp-testing-unittest_main + cpp-protobuf-util + udf-service-exception_policy + public-purecalc-common + purecalc-io_specs-protobuf + purecalc-ut-protos +) +target_link_options(yql-public-purecalc-io_specs-protobuf-ut PRIVATE + -Wl,-platform_version,macos,11.0,11.0 + -fPIC + -fPIC + -framework + CoreFoundation +) +target_sources(yql-public-purecalc-io_specs-protobuf-ut PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/io_specs/protobuf/ut/test_spec.cpp +) +set_property( + TARGET + yql-public-purecalc-io_specs-protobuf-ut + PROPERTY + SPLIT_FACTOR + 1 +) +add_yunittest( + NAME + yql-public-purecalc-io_specs-protobuf-ut + TEST_TARGET + yql-public-purecalc-io_specs-protobuf-ut + TEST_ARG + --print-before-suite + --print-before-test + --fork-tests + --print-times + --show-fails +) +set_yunittest_property( + TEST + yql-public-purecalc-io_specs-protobuf-ut + PROPERTY + LABELS + MEDIUM +) +set_yunittest_property( + TEST + yql-public-purecalc-io_specs-protobuf-ut + PROPERTY + PROCESSORS + 1 +) +target_allocator(yql-public-purecalc-io_specs-protobuf-ut + system_allocator +) +vcs_info(yql-public-purecalc-io_specs-protobuf-ut) diff --git a/ydb/library/yql/public/purecalc/io_specs/protobuf/ut/CMakeLists.linux-aarch64.txt b/ydb/library/yql/public/purecalc/io_specs/protobuf/ut/CMakeLists.linux-aarch64.txt new file mode 100644 index 0000000000..2ff8d5a7fc --- /dev/null +++ b/ydb/library/yql/public/purecalc/io_specs/protobuf/ut/CMakeLists.linux-aarch64.txt @@ -0,0 +1,74 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_executable(yql-public-purecalc-io_specs-protobuf-ut) +target_compile_options(yql-public-purecalc-io_specs-protobuf-ut PRIVATE + -DUSE_CURRENT_UDF_ABI_VERSION +) +target_link_libraries(yql-public-purecalc-io_specs-protobuf-ut PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + cpp-testing-unittest_main + cpp-protobuf-util + udf-service-exception_policy + public-purecalc-common + purecalc-io_specs-protobuf + purecalc-ut-protos +) +target_link_options(yql-public-purecalc-io_specs-protobuf-ut PRIVATE + -ldl + -lrt + -Wl,--no-as-needed + -fPIC + -fPIC + -lpthread + -lrt + -ldl +) +target_sources(yql-public-purecalc-io_specs-protobuf-ut PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/io_specs/protobuf/ut/test_spec.cpp +) +set_property( + TARGET + yql-public-purecalc-io_specs-protobuf-ut + PROPERTY + SPLIT_FACTOR + 1 +) +add_yunittest( + NAME + yql-public-purecalc-io_specs-protobuf-ut + TEST_TARGET + yql-public-purecalc-io_specs-protobuf-ut + TEST_ARG + --print-before-suite + --print-before-test + --fork-tests + --print-times + --show-fails +) +set_yunittest_property( + TEST + yql-public-purecalc-io_specs-protobuf-ut + PROPERTY + LABELS + MEDIUM +) +set_yunittest_property( + TEST + yql-public-purecalc-io_specs-protobuf-ut + PROPERTY + PROCESSORS + 1 +) +target_allocator(yql-public-purecalc-io_specs-protobuf-ut + cpp-malloc-jemalloc +) +vcs_info(yql-public-purecalc-io_specs-protobuf-ut) diff --git a/ydb/library/yql/public/purecalc/io_specs/protobuf/ut/CMakeLists.linux-x86_64.txt b/ydb/library/yql/public/purecalc/io_specs/protobuf/ut/CMakeLists.linux-x86_64.txt new file mode 100644 index 0000000000..2defc0237f --- /dev/null +++ b/ydb/library/yql/public/purecalc/io_specs/protobuf/ut/CMakeLists.linux-x86_64.txt @@ -0,0 +1,76 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_executable(yql-public-purecalc-io_specs-protobuf-ut) +target_compile_options(yql-public-purecalc-io_specs-protobuf-ut PRIVATE + -DUSE_CURRENT_UDF_ABI_VERSION +) +target_link_libraries(yql-public-purecalc-io_specs-protobuf-ut PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + library-cpp-cpuid_check + cpp-testing-unittest_main + cpp-protobuf-util + udf-service-exception_policy + public-purecalc-common + purecalc-io_specs-protobuf + purecalc-ut-protos +) +target_link_options(yql-public-purecalc-io_specs-protobuf-ut PRIVATE + -ldl + -lrt + -Wl,--no-as-needed + -fPIC + -fPIC + -lpthread + -lrt + -ldl +) +target_sources(yql-public-purecalc-io_specs-protobuf-ut PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/io_specs/protobuf/ut/test_spec.cpp +) +set_property( + TARGET + yql-public-purecalc-io_specs-protobuf-ut + PROPERTY + SPLIT_FACTOR + 1 +) +add_yunittest( + NAME + yql-public-purecalc-io_specs-protobuf-ut + TEST_TARGET + yql-public-purecalc-io_specs-protobuf-ut + TEST_ARG + --print-before-suite + --print-before-test + --fork-tests + --print-times + --show-fails +) +set_yunittest_property( + TEST + yql-public-purecalc-io_specs-protobuf-ut + PROPERTY + LABELS + MEDIUM +) +set_yunittest_property( + TEST + yql-public-purecalc-io_specs-protobuf-ut + PROPERTY + PROCESSORS + 1 +) +target_allocator(yql-public-purecalc-io_specs-protobuf-ut + cpp-malloc-tcmalloc + libs-tcmalloc-no_percpu_cache +) +vcs_info(yql-public-purecalc-io_specs-protobuf-ut) diff --git a/ydb/library/yql/public/purecalc/io_specs/protobuf/ut/CMakeLists.txt b/ydb/library/yql/public/purecalc/io_specs/protobuf/ut/CMakeLists.txt new file mode 100644 index 0000000000..f8b31df0c1 --- /dev/null +++ b/ydb/library/yql/public/purecalc/io_specs/protobuf/ut/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) + include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/ydb/library/yql/public/purecalc/io_specs/protobuf/ut/CMakeLists.windows-x86_64.txt b/ydb/library/yql/public/purecalc/io_specs/protobuf/ut/CMakeLists.windows-x86_64.txt new file mode 100644 index 0000000000..4e2687f660 --- /dev/null +++ b/ydb/library/yql/public/purecalc/io_specs/protobuf/ut/CMakeLists.windows-x86_64.txt @@ -0,0 +1,64 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_executable(yql-public-purecalc-io_specs-protobuf-ut) +target_compile_options(yql-public-purecalc-io_specs-protobuf-ut PRIVATE + -DUSE_CURRENT_UDF_ABI_VERSION +) +target_link_libraries(yql-public-purecalc-io_specs-protobuf-ut PUBLIC + contrib-libs-cxxsupp + yutil + library-cpp-cpuid_check + cpp-testing-unittest_main + cpp-protobuf-util + udf-service-exception_policy + public-purecalc-common + purecalc-io_specs-protobuf + purecalc-ut-protos +) +target_sources(yql-public-purecalc-io_specs-protobuf-ut PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/io_specs/protobuf/ut/test_spec.cpp +) +set_property( + TARGET + yql-public-purecalc-io_specs-protobuf-ut + PROPERTY + SPLIT_FACTOR + 1 +) +add_yunittest( + NAME + yql-public-purecalc-io_specs-protobuf-ut + TEST_TARGET + yql-public-purecalc-io_specs-protobuf-ut + TEST_ARG + --print-before-suite + --print-before-test + --fork-tests + --print-times + --show-fails +) +set_yunittest_property( + TEST + yql-public-purecalc-io_specs-protobuf-ut + PROPERTY + LABELS + MEDIUM +) +set_yunittest_property( + TEST + yql-public-purecalc-io_specs-protobuf-ut + PROPERTY + PROCESSORS + 1 +) +target_allocator(yql-public-purecalc-io_specs-protobuf-ut + system_allocator +) +vcs_info(yql-public-purecalc-io_specs-protobuf-ut) diff --git a/ydb/library/yql/public/purecalc/io_specs/protobuf/ut/test_spec.cpp b/ydb/library/yql/public/purecalc/io_specs/protobuf/ut/test_spec.cpp new file mode 100644 index 0000000000..384e617016 --- /dev/null +++ b/ydb/library/yql/public/purecalc/io_specs/protobuf/ut/test_spec.cpp @@ -0,0 +1,995 @@ +#include <library/cpp/testing/unittest/registar.h> + +#include <ydb/library/yql/public/purecalc/common/interface.h> +#include <ydb/library/yql/public/purecalc/io_specs/protobuf/spec.h> +#include <ydb/library/yql/public/purecalc/ut/protos/test_structs.pb.h> +#include <library/cpp/protobuf/util/pb_io.h> +#include <util/generic/xrange.h> + +namespace { + TMaybe<NPureCalcProto::TAllTypes> allTypesMessage; + + NPureCalcProto::TAllTypes& GetCanonicalMessage() { + if (!allTypesMessage) { + allTypesMessage = NPureCalcProto::TAllTypes(); + + allTypesMessage->SetFDouble(1); + allTypesMessage->SetFFloat(2); + allTypesMessage->SetFInt64(3); + allTypesMessage->SetFSfixed64(4); + allTypesMessage->SetFSint64(5); + allTypesMessage->SetFUint64(6); + allTypesMessage->SetFFixed64(7); + allTypesMessage->SetFInt32(8); + allTypesMessage->SetFSfixed32(9); + allTypesMessage->SetFSint32(10); + allTypesMessage->SetFUint32(11); + allTypesMessage->SetFFixed32(12); + allTypesMessage->SetFBool(true); + allTypesMessage->SetFString("asd"); + allTypesMessage->SetFBytes("dsa"); + } + + return allTypesMessage.GetRef(); + } + + template <typename T1, typename T2> + void AssertEqualToCanonical(const T1& got, const T2& expected) { + UNIT_ASSERT_EQUAL(expected.GetFDouble(), got.GetFDouble()); + UNIT_ASSERT_EQUAL(expected.GetFFloat(), got.GetFFloat()); + UNIT_ASSERT_EQUAL(expected.GetFInt64(), got.GetFInt64()); + UNIT_ASSERT_EQUAL(expected.GetFSfixed64(), got.GetFSfixed64()); + UNIT_ASSERT_EQUAL(expected.GetFSint64(), got.GetFSint64()); + UNIT_ASSERT_EQUAL(expected.GetFUint64(), got.GetFUint64()); + UNIT_ASSERT_EQUAL(expected.GetFFixed64(), got.GetFFixed64()); + UNIT_ASSERT_EQUAL(expected.GetFInt32(), got.GetFInt32()); + UNIT_ASSERT_EQUAL(expected.GetFSfixed32(), got.GetFSfixed32()); + UNIT_ASSERT_EQUAL(expected.GetFSint32(), got.GetFSint32()); + UNIT_ASSERT_EQUAL(expected.GetFUint32(), got.GetFUint32()); + UNIT_ASSERT_EQUAL(expected.GetFFixed32(), got.GetFFixed32()); + UNIT_ASSERT_EQUAL(expected.GetFBool(), got.GetFBool()); + UNIT_ASSERT_EQUAL(expected.GetFString(), got.GetFString()); + UNIT_ASSERT_EQUAL(expected.GetFBytes(), got.GetFBytes()); + } + + template <typename T> + void AssertEqualToCanonical(const T& got) { + AssertEqualToCanonical(got, GetCanonicalMessage()); + } + + TString SerializeToTextFormatAsString(const google::protobuf::Message& message) { + TString result; + { + TStringOutput output(result); + SerializeToTextFormat(message, output); + } + return result; + } + + template <typename T> + void AssertProtoEqual(const T& actual, const T& expected) { + UNIT_ASSERT_VALUES_EQUAL(SerializeToTextFormatAsString(actual), SerializeToTextFormatAsString(expected)); + } +} + +class TAllTypesStreamImpl: public NYql::NPureCalc::IStream<NPureCalcProto::TAllTypes*> { +private: + int I_ = 0; + NPureCalcProto::TAllTypes Message_ = GetCanonicalMessage(); + +public: + NPureCalcProto::TAllTypes* Fetch() override { + if (I_ > 0) { + return nullptr; + } else { + I_ += 1; + return &Message_; + } + } +}; + +class TSimpleMessageStreamImpl: public NYql::NPureCalc::IStream<NPureCalcProto::TSimpleMessage*> { +public: + TSimpleMessageStreamImpl(i32 value) + { + Message_.SetX(value); + } + + NPureCalcProto::TSimpleMessage* Fetch() override { + if (Exhausted_) { + return nullptr; + } else { + Exhausted_ = true; + return &Message_; + } + } + +private: + NPureCalcProto::TSimpleMessage Message_; + bool Exhausted_ = false; +}; + +class TAllTypesConsumerImpl: public NYql::NPureCalc::IConsumer<NPureCalcProto::TAllTypes*> { +private: + int I_ = 0; + +public: + void OnObject(NPureCalcProto::TAllTypes* t) override { + I_ += 1; + AssertEqualToCanonical(*t); + } + + void OnFinish() override { + UNIT_ASSERT(I_ > 0); + } +}; + +class TStringMessageStreamImpl: public NYql::NPureCalc::IStream<NPureCalcProto::TStringMessage*> { +private: + int I_ = 0; + NPureCalcProto::TStringMessage Message_{}; + +public: + NPureCalcProto::TStringMessage* Fetch() override { + if (I_ >= 3) { + return nullptr; + } else { + Message_.SetX(TString("-") * I_); + I_ += 1; + return &Message_; + } + } +}; + +class TSimpleMessageConsumerImpl: public NYql::NPureCalc::IConsumer<NPureCalcProto::TSimpleMessage*> { +private: + TVector<int>* Buf_; + +public: + TSimpleMessageConsumerImpl(TVector<int>* buf) + : Buf_(buf) + { + } + +public: + void OnObject(NPureCalcProto::TSimpleMessage* t) override { + Buf_->push_back(t->GetX()); + } + + void OnFinish() override { + Buf_->push_back(-100); + } +}; + +using TMessagesVariant = std::variant<NPureCalcProto::TSplitted1*, NPureCalcProto::TSplitted2*, NPureCalcProto::TStringMessage*>; + +class TVariantConsumerImpl: public NYql::NPureCalc::IConsumer<TMessagesVariant> { +public: + using TType0 = TVector<std::pair<i32, TString>>; + using TType1 = TVector<std::pair<ui32, TString>>; + using TType2 = TVector<TString>; + +public: + TVariantConsumerImpl(TType0* q0, TType1* q1, TType2* q2, int* v) + : Queue0_(q0) + , Queue1_(q1) + , Queue2_(q2) + , Value_(v) + { + } + + void OnObject(TMessagesVariant value) override { + if (auto* p = std::get_if<0>(&value)) { + Queue0_->push_back({(*p)->GetBInt(), std::move(*(*p)->MutableBString())}); + } else if (auto* p = std::get_if<1>(&value)) { + Queue1_->push_back({(*p)->GetCUint(), std::move(*(*p)->MutableCString())}); + } else if (auto* p = std::get_if<2>(&value)) { + Queue2_->push_back(std::move(*(*p)->MutableX())); + } else { + Y_FAIL("invalid variant alternative"); + } + } + + void OnFinish() override { + *Value_ = 42; + } + +private: + TType0* Queue0_; + TType1* Queue1_; + TType2* Queue2_; + int* Value_; +}; + +class TUnsplittedStreamImpl: public NYql::NPureCalc::IStream<NPureCalcProto::TUnsplitted*> { +public: + TUnsplittedStreamImpl() + { + Message_.SetAInt(-23); + Message_.SetAUint(111); + Message_.SetAString("Hello!"); + } + +public: + NPureCalcProto::TUnsplitted* Fetch() override { + switch (I_) { + case 0: + ++I_; + return &Message_; + case 1: + ++I_; + Message_.SetABool(false); + return &Message_; + case 2: + ++I_; + Message_.SetABool(true); + return &Message_; + default: + return nullptr; + } + } + +private: + NPureCalcProto::TUnsplitted Message_; + ui32 I_ = 0; +}; + +template<typename T> +struct TVectorConsumer: public NYql::NPureCalc::IConsumer<T*> { + TVector<T> Data; + + void OnObject(T* t) override { + Data.push_back(*t); + } + + void OnFinish() override { + } +}; + +template <typename T> +struct TVectorStream: public NYql::NPureCalc::IStream<T*> { + TVector<T> Data; + size_t Index = 0; + +public: + T* Fetch() override { + return Index < Data.size() ? &Data[Index++] : nullptr; + } +}; + +Y_UNIT_TEST_SUITE(TestProtoIO) { + Y_UNIT_TEST(TestAllTypes) { + using namespace NYql::NPureCalc; + + auto factory = MakeProgramFactory(); + + { + auto program = factory->MakePullStreamProgram( + TProtobufInputSpec<NPureCalcProto::TAllTypes>(), + TProtobufOutputSpec<NPureCalcProto::TAllTypes>(), + "SELECT * FROM Input", + ETranslationMode::SQL + ); + + auto stream = program->Apply(MakeHolder<TAllTypesStreamImpl>()); + + NPureCalcProto::TAllTypes* message; + + UNIT_ASSERT(message = stream->Fetch()); + AssertEqualToCanonical(*message); + UNIT_ASSERT(!stream->Fetch()); + } + + { + auto program = factory->MakePullListProgram( + TProtobufInputSpec<NPureCalcProto::TAllTypes>(), + TProtobufOutputSpec<NPureCalcProto::TAllTypes>(), + "SELECT * FROM Input", + ETranslationMode::SQL + ); + + auto stream = program->Apply(MakeHolder<TAllTypesStreamImpl>()); + + NPureCalcProto::TAllTypes* message; + + UNIT_ASSERT(message = stream->Fetch()); + AssertEqualToCanonical(*message); + UNIT_ASSERT(!stream->Fetch()); + } + + { + auto program = factory->MakePushStreamProgram( + TProtobufInputSpec<NPureCalcProto::TAllTypes>(), + TProtobufOutputSpec<NPureCalcProto::TAllTypes>(), + "SELECT * FROM Input", + ETranslationMode::SQL + ); + + auto consumer = program->Apply(MakeHolder<TAllTypesConsumerImpl>()); + + UNIT_ASSERT_NO_EXCEPTION([&](){ consumer->OnObject(&GetCanonicalMessage()); }()); + UNIT_ASSERT_NO_EXCEPTION([&](){ consumer->OnFinish(); }()); + } + } + + template <typename T> + void CheckPassThroughYql(T& testInput, google::protobuf::Arena* arena = nullptr) { + using namespace NYql::NPureCalc; + + auto factory = MakeProgramFactory(); + + { + auto program = factory->MakePushStreamProgram( + TProtobufInputSpec<T>(), + TProtobufOutputSpec<T>({}, arena), + "SELECT * FROM Input", + ETranslationMode::SQL + ); + + auto resultConsumer = MakeHolder<TVectorConsumer<T>>(); + auto* resultConsumerPtr = resultConsumer.Get(); + auto sourceConsumer = program->Apply(std::move(resultConsumer)); + + sourceConsumer->OnObject(&testInput); + UNIT_ASSERT_VALUES_EQUAL(1, resultConsumerPtr->Data.size()); + AssertProtoEqual(resultConsumerPtr->Data[0], testInput); + + resultConsumerPtr->Data.clear(); + sourceConsumer->OnObject(&testInput); + UNIT_ASSERT_VALUES_EQUAL(1, resultConsumerPtr->Data.size()); + AssertProtoEqual(resultConsumerPtr->Data[0], testInput); + } + + { + auto program = factory->MakePullStreamProgram( + TProtobufInputSpec<T>(), + TProtobufOutputSpec<T>({}, arena), + "SELECT * FROM Input", + ETranslationMode::SQL + ); + + auto sourceStream = MakeHolder<TVectorStream<T>>(); + auto* sourceStreamPtr = sourceStream.Get(); + auto resultStream = program->Apply(std::move(sourceStream)); + + sourceStreamPtr->Data.push_back(testInput); + T* resultMessage; + UNIT_ASSERT(resultMessage = resultStream->Fetch()); + AssertProtoEqual(*resultMessage, testInput); + UNIT_ASSERT(!resultStream->Fetch()); + + UNIT_ASSERT_VALUES_EQUAL(resultMessage->GetArena(), arena); + + if (arena != nullptr) { + arena->Reset(); + } + } + + { + auto program = factory->MakePullListProgram( + TProtobufInputSpec<T>(), + TProtobufOutputSpec<T>({}, arena), + "SELECT * FROM Input", + ETranslationMode::SQL + ); + + auto sourceStream = MakeHolder<TVectorStream<T>>(); + auto* sourceStreamPtr = sourceStream.Get(); + auto resultStream = program->Apply(std::move(sourceStream)); + + sourceStreamPtr->Data.push_back(testInput); + T* resultMessage; + UNIT_ASSERT(resultMessage = resultStream->Fetch()); + AssertProtoEqual(*resultMessage, testInput); + UNIT_ASSERT(!resultStream->Fetch()); + + UNIT_ASSERT_VALUES_EQUAL(resultMessage->GetArena(), arena); + + if (arena != nullptr) { + arena->Reset(); + } + } + } + + template <typename T> + void CheckMessageIsInvalid(const TString& expectedExceptionMessage) { + using namespace NYql::NPureCalc; + + auto factory = MakeProgramFactory(); + + UNIT_ASSERT_EXCEPTION_CONTAINS([&]() { + factory->MakePushStreamProgram(TProtobufInputSpec<T>(), TProtobufOutputSpec<T>(), "SELECT * FROM Input", ETranslationMode::SQL); + }(), yexception, expectedExceptionMessage); + + UNIT_ASSERT_EXCEPTION_CONTAINS([&]() { + factory->MakePullStreamProgram(TProtobufInputSpec<T>(), TProtobufOutputSpec<T>(), "SELECT * FROM Input", ETranslationMode::SQL); + }(), yexception, expectedExceptionMessage); + + UNIT_ASSERT_EXCEPTION_CONTAINS([&]() { + factory->MakePullListProgram(TProtobufInputSpec<T>(), TProtobufOutputSpec<T>(), "SELECT * FROM Input", ETranslationMode::SQL); + }(), yexception, expectedExceptionMessage); + } + + Y_UNIT_TEST(TestSimpleNested) { + NPureCalcProto::TSimpleNested input; + input.SetX(10); + { + auto* item = input.MutableY(); + *item = GetCanonicalMessage(); + item->SetFUint64(100); + } + CheckPassThroughYql(input); + } + + Y_UNIT_TEST(TestOptionalNested) { + NPureCalcProto::TOptionalNested input; + { + auto* item = input.MutableX(); + *item = GetCanonicalMessage(); + item->SetFUint64(100); + } + CheckPassThroughYql(input); + } + + Y_UNIT_TEST(TestSimpleRepeated) { + NPureCalcProto::TSimpleRepeated input; + input.SetX(20); + input.AddY(100); + input.AddY(200); + input.AddY(300); + CheckPassThroughYql(input); + } + + Y_UNIT_TEST(TestNestedRepeated) { + NPureCalcProto::TNestedRepeated input; + input.SetX(20); + { + auto* item = input.MutableY()->Add(); + item->SetX(100); + { + auto* y = item->MutableY(); + *y = GetCanonicalMessage(); + y->SetFUint64(1000); + } + } + { + auto* item = input.MutableY()->Add(); + item->SetX(200); + { + auto* y = item->MutableY(); + *y = GetCanonicalMessage(); + y->SetFUint64(2000); + } + } + CheckPassThroughYql(input); + } + + Y_UNIT_TEST(TestMessageWithEnum) { + NPureCalcProto::TMessageWithEnum input; + input.AddEnumValue(NPureCalcProto::TMessageWithEnum::VALUE1); + input.AddEnumValue(NPureCalcProto::TMessageWithEnum::VALUE2); + CheckPassThroughYql(input); + } + + Y_UNIT_TEST(TestRecursive) { + CheckMessageIsInvalid<NPureCalcProto::TRecursive>("NPureCalcProto.TRecursive->NPureCalcProto.TRecursive"); + } + + Y_UNIT_TEST(TestRecursiveIndirectly) { + CheckMessageIsInvalid<NPureCalcProto::TRecursiveIndirectly>( + "NPureCalcProto.TRecursiveIndirectly->NPureCalcProto.TRecursiveIndirectly.TNested->NPureCalcProto.TRecursiveIndirectly"); + } + + Y_UNIT_TEST(TestColumnsFilter) { + using namespace NYql::NPureCalc; + + auto factory = MakeProgramFactory(); + + auto filter = THashSet<TString>({"FFixed64", "FBool", "FBytes"}); + + NPureCalcProto::TOptionalAllTypes canonicalMessage; + canonicalMessage.SetFFixed64(GetCanonicalMessage().GetFFixed64()); + canonicalMessage.SetFBool(GetCanonicalMessage().GetFBool()); + canonicalMessage.SetFBytes(GetCanonicalMessage().GetFBytes()); + + { + auto inputSpec = TProtobufInputSpec<NPureCalcProto::TAllTypes>(); + auto outputSpec = TProtobufOutputSpec<NPureCalcProto::TOptionalAllTypes>(); + outputSpec.SetOutputColumnsFilter(filter); + + auto program = factory->MakePullStreamProgram( + inputSpec, + outputSpec, + "SELECT * FROM Input", + ETranslationMode::SQL + ); + + UNIT_ASSERT_EQUAL(program->GetUsedColumns(), filter); + + auto stream = program->Apply(MakeHolder<TAllTypesStreamImpl>()); + + NPureCalcProto::TOptionalAllTypes* message; + + UNIT_ASSERT(message = stream->Fetch()); + AssertEqualToCanonical(*message, canonicalMessage); + UNIT_ASSERT(!stream->Fetch()); + } + } + + Y_UNIT_TEST(TestColumnsFilterWithOptionalFields) { + using namespace NYql::NPureCalc; + + auto factory = MakeProgramFactory(); + + auto fields = THashSet<TString>({"FFixed64", "FBool", "FBytes"}); + + NPureCalcProto::TOptionalAllTypes canonicalMessage; + canonicalMessage.SetFFixed64(GetCanonicalMessage().GetFFixed64()); + canonicalMessage.SetFBool(GetCanonicalMessage().GetFBool()); + canonicalMessage.SetFBytes(GetCanonicalMessage().GetFBytes()); + + { + auto program = factory->MakePullStreamProgram( + TProtobufInputSpec<NPureCalcProto::TAllTypes>(), + TProtobufOutputSpec<NPureCalcProto::TOptionalAllTypes>(), + "SELECT FFixed64, FBool, FBytes FROM Input", + ETranslationMode::SQL + ); + + UNIT_ASSERT_EQUAL(program->GetUsedColumns(), fields); + + auto stream = program->Apply(MakeHolder<TAllTypesStreamImpl>()); + + NPureCalcProto::TOptionalAllTypes* message; + + UNIT_ASSERT(message = stream->Fetch()); + AssertEqualToCanonical(*message, canonicalMessage); + UNIT_ASSERT(!stream->Fetch()); + } + + UNIT_ASSERT_EXCEPTION_CONTAINS([&](){ + factory->MakePullStreamProgram( + TProtobufInputSpec<NPureCalcProto::TAllTypes>(), + TProtobufOutputSpec<NPureCalcProto::TAllTypes>(), + "SELECT FFixed64, FBool, FBytes FROM Input", + ETranslationMode::SQL + ); + }(), TCompileError, "Failed to optimize"); + } + + Y_UNIT_TEST(TestUsedColumns) { + using namespace NYql::NPureCalc; + + auto factory = MakeProgramFactory(); + + auto allFields = THashSet<TString>(); + + for (auto i: xrange(NPureCalcProto::TOptionalAllTypes::descriptor()->field_count())) { + allFields.emplace(NPureCalcProto::TOptionalAllTypes::descriptor()->field(i)->name()); + } + + { + auto program = factory->MakePullStreamProgram( + TProtobufInputSpec<NPureCalcProto::TAllTypes>(), + TProtobufOutputSpec<NPureCalcProto::TOptionalAllTypes>(), + "SELECT * FROM Input", + ETranslationMode::SQL + ); + + UNIT_ASSERT_EQUAL(program->GetUsedColumns(), allFields); + } + } + + Y_UNIT_TEST(TestChaining) { + using namespace NYql::NPureCalc; + + auto factory = MakeProgramFactory(); + + TString sql1 = "SELECT UNWRAP(X || CAST(\"HI\" AS Utf8)) AS X FROM Input"; + TString sql2 = "SELECT LENGTH(X) AS X FROM Input"; + + { + auto program1 = factory->MakePullStreamProgram( + TProtobufInputSpec<NPureCalcProto::TStringMessage>(), + TProtobufOutputSpec<NPureCalcProto::TStringMessage>(), + sql1, + ETranslationMode::SQL + ); + + auto program2 = factory->MakePullStreamProgram( + TProtobufInputSpec<NPureCalcProto::TStringMessage>(), + TProtobufOutputSpec<NPureCalcProto::TSimpleMessage>(), + sql2, + ETranslationMode::SQL + ); + + auto input = MakeHolder<TStringMessageStreamImpl>(); + auto intermediate = program1->Apply(std::move(input)); + auto output = program2->Apply(std::move(intermediate)); + + TVector<int> expected = {2, 3, 4}; + TVector<int> actual{}; + + while (auto *x = output->Fetch()) { + actual.push_back(x->GetX()); + } + + UNIT_ASSERT_EQUAL(expected, actual); + } + + { + auto program1 = factory->MakePullListProgram( + TProtobufInputSpec<NPureCalcProto::TStringMessage>(), + TProtobufOutputSpec<NPureCalcProto::TStringMessage>(), + sql1, + ETranslationMode::SQL + ); + + auto program2 = factory->MakePullListProgram( + TProtobufInputSpec<NPureCalcProto::TStringMessage>(), + TProtobufOutputSpec<NPureCalcProto::TSimpleMessage>(), + sql2, + ETranslationMode::SQL + ); + + auto input = MakeHolder<TStringMessageStreamImpl>(); + auto intermediate = program1->Apply(std::move(input)); + auto output = program2->Apply(std::move(intermediate)); + + TVector<int> expected = {2, 3, 4}; + TVector<int> actual{}; + + while (auto *x = output->Fetch()) { + actual.push_back(x->GetX()); + } + + UNIT_ASSERT_EQUAL(expected, actual); + } + + { + auto program1 = factory->MakePushStreamProgram( + TProtobufInputSpec<NPureCalcProto::TStringMessage>(), + TProtobufOutputSpec<NPureCalcProto::TStringMessage>(), + sql1, + ETranslationMode::SQL + ); + + auto program2 = factory->MakePushStreamProgram( + TProtobufInputSpec<NPureCalcProto::TStringMessage>(), + TProtobufOutputSpec<NPureCalcProto::TSimpleMessage>(), + sql2, + ETranslationMode::SQL + ); + + TVector<int> expected = {2, 3, 4, -100}; + TVector<int> actual{}; + + auto consumer = MakeHolder<TSimpleMessageConsumerImpl>(&actual); + auto intermediate = program2->Apply(std::move(consumer)); + auto input = program1->Apply(std::move(intermediate)); + + NPureCalcProto::TStringMessage Message; + + Message.SetX(""); + input->OnObject(&Message); + + Message.SetX("1"); + input->OnObject(&Message); + + Message.SetX("22"); + input->OnObject(&Message); + + input->OnFinish(); + + UNIT_ASSERT_EQUAL(expected, actual); + } + } + + Y_UNIT_TEST(TestTimestampColumn) { + using namespace NYql::NPureCalc; + + auto factory = MakeProgramFactory(TProgramFactoryOptions() + .SetDeterministicTimeProviderSeed(1)); // seconds + + NPureCalcProto::TOptionalAllTypes canonicalMessage; + + { + auto inputSpec = TProtobufInputSpec<NPureCalcProto::TAllTypes>("MyTimestamp"); + auto outputSpec = TProtobufOutputSpec<NPureCalcProto::TOptionalAllTypes>(); + + auto program = factory->MakePullStreamProgram( + inputSpec, + outputSpec, + "SELECT MyTimestamp AS FFixed64 FROM Input", + ETranslationMode::SQL + ); + + auto stream = program->Apply(MakeHolder<TAllTypesStreamImpl>()); + + NPureCalcProto::TOptionalAllTypes* message; + + UNIT_ASSERT(message = stream->Fetch()); + UNIT_ASSERT_VALUES_EQUAL(message->GetFFixed64(), 1000000); // microseconds + UNIT_ASSERT(!stream->Fetch()); + } + } + + Y_UNIT_TEST(TestTableNames) { + using namespace NYql::NPureCalc; + + auto factory = MakeProgramFactory(TProgramFactoryOptions().SetUseSystemColumns(true)); + + auto runTest = [&](TStringBuf tableName, i32 value) { + auto program = factory->MakePullStreamProgram( + TProtobufInputSpec<NPureCalcProto::TSimpleMessage>(), + TProtobufOutputSpec<NPureCalcProto::TNamedSimpleMessage>(), + TString::Join("SELECT TableName() AS Name, X FROM ", tableName), + ETranslationMode::SQL + ); + + auto stream = program->Apply(MakeHolder<TSimpleMessageStreamImpl>(value)); + auto message = stream->Fetch(); + + UNIT_ASSERT(message); + UNIT_ASSERT_VALUES_EQUAL(message->GetX(), value); + UNIT_ASSERT_VALUES_EQUAL(message->GetName(), tableName); + UNIT_ASSERT(!stream->Fetch()); + }; + + runTest("Input", 37); + runTest("Input0", -23); + } + + void CheckMultiOutputs(TMaybe<TVector<google::protobuf::Arena*>> arenas) { + using namespace NYql::NPureCalc; + + auto factory = MakeProgramFactory(); + TString sExpr = R"( +( + (let $type (ParseType '"Variant<Struct<BInt:Int32,BString:Utf8>, Struct<CUint:Uint32,CString:Utf8>, Struct<X:Utf8>>")) + (let $stream (Self '0)) + (return (FlatMap (Self '0) (lambda '(x) (block '( + (let $cond (Member x 'ABool)) + (let $item0 (Variant (AsStruct '('BInt (Member x 'AInt)) '('BString (Member x 'AString))) '0 $type)) + (let $item1 (Variant (AsStruct '('CUint (Member x 'AUint)) '('CString (Member x 'AString))) '1 $type)) + (let $item2 (Variant (AsStruct '('X (Utf8 'Error))) '2 $type)) + (return (If (Exists $cond) (If (Unwrap $cond) (AsList $item0) (AsList $item1)) (AsList $item2))) + ))))) +) + )"; + + { + auto program = factory->MakePushStreamProgram( + TProtobufInputSpec<NPureCalcProto::TUnsplitted>(), + TProtobufMultiOutputSpec<NPureCalcProto::TSplitted1, NPureCalcProto::TSplitted2, NPureCalcProto::TStringMessage>( + {}, arenas + ), + sExpr, + ETranslationMode::SExpr + ); + + TVariantConsumerImpl::TType0 queue0; + TVariantConsumerImpl::TType1 queue1; + TVariantConsumerImpl::TType2 queue2; + int finalValue = 0; + + auto consumer = MakeHolder<TVariantConsumerImpl>(&queue0, &queue1, &queue2, &finalValue); + auto input = program->Apply(std::move(consumer)); + + NPureCalcProto::TUnsplitted message; + message.SetAInt(-13); + message.SetAUint(47); + message.SetAString("first message"); + message.SetABool(true); + + input->OnObject(&message); + UNIT_ASSERT(queue0.size() == 1 && queue1.empty() && queue2.empty() && finalValue == 0); + + message.SetABool(false); + message.SetAString("second message"); + + input->OnObject(&message); + UNIT_ASSERT(queue0.size() == 1 && queue1.size() == 1 && queue2.empty() && finalValue == 0); + + message.ClearABool(); + + input->OnObject(&message); + UNIT_ASSERT(queue0.size() == 1 && queue1.size() == 1 && queue2.size() == 1 && finalValue == 0); + + input->OnFinish(); + UNIT_ASSERT(queue0.size() == 1 && queue1.size() == 1 && queue2.size() == 1 && finalValue == 42); + + TVariantConsumerImpl::TType0 expected0 = {{-13, "first message"}}; + UNIT_ASSERT_EQUAL(queue0, expected0); + + TVariantConsumerImpl::TType1 expected1 = {{47, "second message"}}; + UNIT_ASSERT_EQUAL(queue1, expected1); + + TVariantConsumerImpl::TType2 expected2 = {{"Error"}}; + UNIT_ASSERT_EQUAL(queue2, expected2); + } + + { + auto program1 = factory->MakePullStreamProgram( + TProtobufInputSpec<NPureCalcProto::TUnsplitted>(), + TProtobufMultiOutputSpec<NPureCalcProto::TSplitted1, NPureCalcProto::TSplitted2, NPureCalcProto::TStringMessage>( + {}, arenas + ), + sExpr, + ETranslationMode::SExpr + ); + + auto program2 = factory->MakePullListProgram( + TProtobufInputSpec<NPureCalcProto::TUnsplitted>(), + TProtobufMultiOutputSpec<NPureCalcProto::TSplitted1, NPureCalcProto::TSplitted2, NPureCalcProto::TStringMessage>( + {}, arenas + ), + sExpr, + ETranslationMode::SExpr + ); + + auto input1 = MakeHolder<TUnsplittedStreamImpl>(); + auto output1 = program1->Apply(std::move(input1)); + + auto input2 = MakeHolder<TUnsplittedStreamImpl>(); + auto output2 = program2->Apply(std::move(input2)); + + decltype(output1->Fetch()) variant1; + decltype(output2->Fetch()) variant2; + +#define ASSERT_EQUAL_FIELDS(X1, X2, I, F, E) \ + UNIT_ASSERT_EQUAL(X1.index(), I); \ + UNIT_ASSERT_EQUAL(X2.index(), I); \ + UNIT_ASSERT_EQUAL(std::get<I>(X1)->Get##F(), E); \ + UNIT_ASSERT_EQUAL(std::get<I>(X2)->Get##F(), E) + + variant1 = output1->Fetch(); + variant2 = output2->Fetch(); + ASSERT_EQUAL_FIELDS(variant1, variant2, 2, X, "Error"); + ASSERT_EQUAL_FIELDS(variant1, variant2, 2, Arena, (arenas.Defined() ? arenas->at(2) : nullptr)); + + variant1 = output1->Fetch(); + variant2 = output2->Fetch(); + ASSERT_EQUAL_FIELDS(variant1, variant2, 1, CUint, 111); + ASSERT_EQUAL_FIELDS(variant1, variant2, 1, CString, "Hello!"); + ASSERT_EQUAL_FIELDS(variant1, variant2, 1, Arena, (arenas.Defined() ? arenas->at(1) : nullptr)); + + variant1 = output1->Fetch(); + variant2 = output2->Fetch(); + ASSERT_EQUAL_FIELDS(variant1, variant2, 0, BInt, -23); + ASSERT_EQUAL_FIELDS(variant1, variant2, 0, BString, "Hello!"); + ASSERT_EQUAL_FIELDS(variant1, variant2, 0, Arena, (arenas.Defined() ? arenas->at(0) : nullptr)); + + variant1 = output1->Fetch(); + variant2 = output2->Fetch(); + UNIT_ASSERT_EQUAL(variant1.index(), 0); + UNIT_ASSERT_EQUAL(variant2.index(), 0); + UNIT_ASSERT_EQUAL(std::get<0>(variant1), nullptr); + UNIT_ASSERT_EQUAL(std::get<0>(variant1), nullptr); + +#undef ASSERT_EQUAL_FIELDS + } + } + + Y_UNIT_TEST(TestMultiOutputs) { + CheckMultiOutputs(Nothing()); + } + + Y_UNIT_TEST(TestSupportedTypes) { + + } + + Y_UNIT_TEST(TestProtobufArena) { + { + NPureCalcProto::TNestedRepeated input; + input.SetX(20); + { + auto* item = input.MutableY()->Add(); + item->SetX(100); + { + auto* y = item->MutableY(); + *y = GetCanonicalMessage(); + y->SetFUint64(1000); + } + } + { + auto* item = input.MutableY()->Add(); + item->SetX(200); + { + auto* y = item->MutableY(); + *y = GetCanonicalMessage(); + y->SetFUint64(2000); + } + } + + google::protobuf::Arena arena; + CheckPassThroughYql(input, &arena); + } + + { + google::protobuf::Arena arena1; + google::protobuf::Arena arena2; + TVector<google::protobuf::Arena*> arenas{&arena1, &arena2, &arena1}; + CheckMultiOutputs(arenas); + } + } + + Y_UNIT_TEST(TestFieldRenames) { + using namespace NYql::NPureCalc; + + auto factory = MakeProgramFactory(); + + TString query = "SELECT InputAlias AS OutputAlias FROM Input"; + + auto inputProtoOptions = TProtoSchemaOptions(); + inputProtoOptions.SetFieldRenames({{"X", "InputAlias"}}); + + auto inputSpec = TProtobufInputSpec<NPureCalcProto::TSimpleMessage>( + Nothing(), std::move(inputProtoOptions) + ); + + auto outputProtoOptions = TProtoSchemaOptions(); + outputProtoOptions.SetFieldRenames({{"X", "OutputAlias"}}); + + auto outputSpec = TProtobufOutputSpec<NPureCalcProto::TSimpleMessage>( + std::move(outputProtoOptions) + ); + + { + auto program = factory->MakePullStreamProgram( + inputSpec, outputSpec, query, ETranslationMode::SQL + ); + + auto input = MakeHolder<TSimpleMessageStreamImpl>(1); + auto output = program->Apply(std::move(input)); + + TVector<int> expected = {1}; + TVector<int> actual; + + while (auto* x = output->Fetch()) { + actual.push_back(x->GetX()); + } + + UNIT_ASSERT_VALUES_EQUAL(expected, actual); + } + + { + auto program = factory->MakePullListProgram( + inputSpec, outputSpec, query, ETranslationMode::SQL + ); + + auto input = MakeHolder<TSimpleMessageStreamImpl>(1); + auto output = program->Apply(std::move(input)); + + TVector<int> expected = {1}; + TVector<int> actual; + + while (auto* x = output->Fetch()) { + actual.push_back(x->GetX()); + } + + UNIT_ASSERT_VALUES_EQUAL(expected, actual); + } + + { + auto program = factory->MakePushStreamProgram( + inputSpec, outputSpec, query, ETranslationMode::SQL + ); + + TVector<int> expected = {1, -100}; + TVector<int> actual; + + auto consumer = MakeHolder<TSimpleMessageConsumerImpl>(&actual); + auto input = program->Apply(std::move(consumer)); + + NPureCalcProto::TSimpleMessage Message; + + Message.SetX(1); + input->OnObject(&Message); + + input->OnFinish(); + + UNIT_ASSERT_VALUES_EQUAL(expected, actual); + } + } +} diff --git a/ydb/library/yql/public/purecalc/io_specs/protobuf/ut/ya.make b/ydb/library/yql/public/purecalc/io_specs/protobuf/ut/ya.make new file mode 100644 index 0000000000..ef457d0548 --- /dev/null +++ b/ydb/library/yql/public/purecalc/io_specs/protobuf/ut/ya.make @@ -0,0 +1,19 @@ +UNITTEST() + +PEERDIR( + library/cpp/protobuf/util + ydb/library/yql/public/udf/service/exception_policy + ydb/library/yql/public/purecalc/common + ydb/library/yql/public/purecalc/io_specs/protobuf + ydb/library/yql/public/purecalc/ut/protos +) + +SIZE(MEDIUM) + +YQL_LAST_ABI_VERSION() + +SRCS( + test_spec.cpp +) + +END() diff --git a/ydb/library/yql/public/purecalc/io_specs/protobuf/ya.make b/ydb/library/yql/public/purecalc/io_specs/protobuf/ya.make new file mode 100644 index 0000000000..a9efad989f --- /dev/null +++ b/ydb/library/yql/public/purecalc/io_specs/protobuf/ya.make @@ -0,0 +1,21 @@ +LIBRARY() + +PEERDIR( + ydb/library/yql/public/purecalc/common + ydb/library/yql/public/purecalc/io_specs/protobuf_raw +) + +SRCS( + spec.cpp + proto_variant.cpp +) + + + YQL_LAST_ABI_VERSION() + + +END() + +RECURSE_FOR_TESTS( + ut +) diff --git a/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/CMakeLists.darwin-x86_64.txt b/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/CMakeLists.darwin-x86_64.txt new file mode 100644 index 0000000000..e482710c07 --- /dev/null +++ b/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,23 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(purecalc-io_specs-protobuf_raw) +target_compile_options(purecalc-io_specs-protobuf_raw PRIVATE + -DUSE_CURRENT_UDF_ABI_VERSION +) +target_link_libraries(purecalc-io_specs-protobuf_raw PUBLIC + contrib-libs-cxxsupp + yutil + public-purecalc-common + purecalc-helpers-protobuf +) +target_sources(purecalc-io_specs-protobuf_raw PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/proto_holder.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/spec.cpp +) diff --git a/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/CMakeLists.linux-aarch64.txt b/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/CMakeLists.linux-aarch64.txt new file mode 100644 index 0000000000..eb794e6f37 --- /dev/null +++ b/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/CMakeLists.linux-aarch64.txt @@ -0,0 +1,24 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(purecalc-io_specs-protobuf_raw) +target_compile_options(purecalc-io_specs-protobuf_raw PRIVATE + -DUSE_CURRENT_UDF_ABI_VERSION +) +target_link_libraries(purecalc-io_specs-protobuf_raw PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + public-purecalc-common + purecalc-helpers-protobuf +) +target_sources(purecalc-io_specs-protobuf_raw PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/proto_holder.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/spec.cpp +) diff --git a/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/CMakeLists.linux-x86_64.txt b/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/CMakeLists.linux-x86_64.txt new file mode 100644 index 0000000000..eb794e6f37 --- /dev/null +++ b/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/CMakeLists.linux-x86_64.txt @@ -0,0 +1,24 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(purecalc-io_specs-protobuf_raw) +target_compile_options(purecalc-io_specs-protobuf_raw PRIVATE + -DUSE_CURRENT_UDF_ABI_VERSION +) +target_link_libraries(purecalc-io_specs-protobuf_raw PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + public-purecalc-common + purecalc-helpers-protobuf +) +target_sources(purecalc-io_specs-protobuf_raw PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/proto_holder.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/spec.cpp +) diff --git a/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/CMakeLists.txt b/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/CMakeLists.txt new file mode 100644 index 0000000000..f8b31df0c1 --- /dev/null +++ b/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) + include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/CMakeLists.windows-x86_64.txt b/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/CMakeLists.windows-x86_64.txt new file mode 100644 index 0000000000..e482710c07 --- /dev/null +++ b/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/CMakeLists.windows-x86_64.txt @@ -0,0 +1,23 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(purecalc-io_specs-protobuf_raw) +target_compile_options(purecalc-io_specs-protobuf_raw PRIVATE + -DUSE_CURRENT_UDF_ABI_VERSION +) +target_link_libraries(purecalc-io_specs-protobuf_raw PUBLIC + contrib-libs-cxxsupp + yutil + public-purecalc-common + purecalc-helpers-protobuf +) +target_sources(purecalc-io_specs-protobuf_raw PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/proto_holder.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/spec.cpp +) diff --git a/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/proto_holder.cpp b/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/proto_holder.cpp new file mode 100644 index 0000000000..95adbc4de9 --- /dev/null +++ b/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/proto_holder.cpp @@ -0,0 +1 @@ +#include "proto_holder.h" diff --git a/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/proto_holder.h b/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/proto_holder.h new file mode 100644 index 0000000000..7d4d843bfc --- /dev/null +++ b/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/proto_holder.h @@ -0,0 +1,31 @@ +#pragma once + +#include <google/protobuf/arena.h> + +#include <util/generic/ptr.h> + +#include <type_traits> + +namespace NYql::NPureCalc { + class TProtoDestroyer { + public: + template <typename T> + static inline void Destroy(T* t) noexcept { + if (t->GetArena() == nullptr) { + CheckedDelete(t); + } + } + }; + + template <typename TProto> + concept IsProtoMessage = std::is_base_of_v<NProtoBuf::Message, TProto>; + + template <IsProtoMessage TProto> + using TProtoHolder = THolder<TProto, TProtoDestroyer>; + + template <IsProtoMessage TProto, typename... TArgs> + TProtoHolder<TProto> MakeProtoHolder(NProtoBuf::Arena* arena, TArgs&&... args) { + auto* ptr = NProtoBuf::Arena::CreateMessage<TProto>(arena, std::forward<TArgs>(args)...); + return TProtoHolder<TProto>(ptr); + } +} diff --git a/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/spec.cpp b/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/spec.cpp new file mode 100644 index 0000000000..8a6f71c5b3 --- /dev/null +++ b/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/spec.cpp @@ -0,0 +1,1064 @@ +#include "proto_holder.h" +#include "spec.h" + +#include <ydb/library/yql/public/udf/udf_value.h> +#include <ydb/library/yql/minikql/computation/mkql_computation_node_holders.h> +#include <ydb/library/yql/minikql/computation/mkql_custom_list.h> +#include <ydb/library/yql/minikql/mkql_string_util.h> +#include <ydb/library/yql/utils/yql_panic.h> +#include <google/protobuf/reflection.h> + +using namespace NYql; +using namespace NYql::NPureCalc; +using namespace google::protobuf; +using namespace NKikimr::NUdf; +using namespace NKikimr::NMiniKQL; + +TProtobufRawInputSpec::TProtobufRawInputSpec( + const Descriptor& descriptor, + const TMaybe<TString>& timestampColumn, + const TProtoSchemaOptions& options +) + : Descriptor_(descriptor) + , TimestampColumn_(timestampColumn) + , SchemaOptions_(options) +{ +} + +const TVector<NYT::TNode>& TProtobufRawInputSpec::GetSchemas() const { + if (SavedSchemas_.size() == 0) { + SavedSchemas_.push_back(MakeSchemaFromProto(Descriptor_, SchemaOptions_)); + if (TimestampColumn_) { + auto timestampType = NYT::TNode::CreateList(); + timestampType.Add("DataType"); + timestampType.Add("Uint64"); + auto timestamp = NYT::TNode::CreateList(); + timestamp.Add(*TimestampColumn_); + timestamp.Add(timestampType); + SavedSchemas_.back().AsList()[1].AsList().push_back(timestamp); + } + } + + return SavedSchemas_; +} + +const Descriptor& TProtobufRawInputSpec::GetDescriptor() const { + return Descriptor_; +} + +const TMaybe<TString>& TProtobufRawInputSpec::GetTimestampColumn() const { + return TimestampColumn_; +} + +const TProtoSchemaOptions& TProtobufRawInputSpec::GetSchemaOptions() const { + return SchemaOptions_; +} + +TProtobufRawOutputSpec::TProtobufRawOutputSpec( + const Descriptor& descriptor, + MessageFactory* factory, + const TProtoSchemaOptions& options, + Arena* arena +) + : Descriptor_(descriptor) + , Factory_(factory) + , SchemaOptions_(options) + , Arena_(arena) +{ + SchemaOptions_.ListIsOptional = true; +} + +const NYT::TNode& TProtobufRawOutputSpec::GetSchema() const { + if (!SavedSchema_) { + SavedSchema_ = MakeSchemaFromProto(Descriptor_, SchemaOptions_); + } + + return SavedSchema_.GetRef(); +} + +const Descriptor& TProtobufRawOutputSpec::GetDescriptor() const { + return Descriptor_; +} + +void TProtobufRawOutputSpec::SetFactory(MessageFactory* factory) { + Factory_ = factory; +} + +MessageFactory* TProtobufRawOutputSpec::GetFactory() const { + return Factory_; +} + +void TProtobufRawOutputSpec::SetArena(Arena* arena) { + Arena_ = arena; +} + +Arena* TProtobufRawOutputSpec::GetArena() const { + return Arena_; +} + +const TProtoSchemaOptions& TProtobufRawOutputSpec::GetSchemaOptions() const { + return SchemaOptions_; +} + +TProtobufRawMultiOutputSpec::TProtobufRawMultiOutputSpec( + TVector<const Descriptor*> descriptors, + TMaybe<TVector<MessageFactory*>> factories, + const TProtoSchemaOptions& options, + TMaybe<TVector<Arena*>> arenas +) + : Descriptors_(std::move(descriptors)) + , SchemaOptions_(options) +{ + if (factories) { + Y_ENSURE(factories->size() == Descriptors_.size(), "number of factories must match number of descriptors"); + Factories_ = std::move(*factories); + } else { + Factories_ = TVector<MessageFactory*>(Descriptors_.size(), nullptr); + } + + if (arenas) { + Y_ENSURE(arenas->size() == Descriptors_.size(), "number of arenas must match number of descriptors"); + Arenas_ = std::move(*arenas); + } else { + Arenas_ = TVector<Arena*>(Descriptors_.size(), nullptr); + } +} + +const NYT::TNode& TProtobufRawMultiOutputSpec::GetSchema() const { + if (SavedSchema_.IsUndefined()) { + SavedSchema_ = MakeVariantSchemaFromProtos(Descriptors_, SchemaOptions_); + } + + return SavedSchema_; +} + +const Descriptor& TProtobufRawMultiOutputSpec::GetDescriptor(ui32 index) const { + Y_ENSURE(index < Descriptors_.size(), "invalid output index"); + + return *Descriptors_[index]; +} + +void TProtobufRawMultiOutputSpec::SetFactory(ui32 index, MessageFactory* factory) { + Y_ENSURE(index < Factories_.size(), "invalid output index"); + + Factories_[index] = factory; +} + +MessageFactory* TProtobufRawMultiOutputSpec::GetFactory(ui32 index) const { + Y_ENSURE(index < Factories_.size(), "invalid output index"); + + return Factories_[index]; +} + +void TProtobufRawMultiOutputSpec::SetArena(ui32 index, Arena* arena) { + Y_ENSURE(index < Arenas_.size(), "invalid output index"); + + Arenas_[index] = arena; +} + +Arena* TProtobufRawMultiOutputSpec::GetArena(ui32 index) const { + Y_ENSURE(index < Arenas_.size(), "invalid output index"); + + return Arenas_[index]; +} + +ui32 TProtobufRawMultiOutputSpec::GetOutputsNumber() const { + return static_cast<ui32>(Descriptors_.size()); +} + +const TProtoSchemaOptions& TProtobufRawMultiOutputSpec::GetSchemaOptions() const { + return SchemaOptions_; +} + +namespace { + struct TFieldMapping { + TString Name; + const FieldDescriptor* Field; + TVector<TFieldMapping> NestedFields; + }; + + /** + * Fills a tree of field mappings from the given yql struct type to protobuf message. + * + * @param fromType source yql type. + * @param toType target protobuf message type. + * @param mappings destination vector will be filled with field descriptors. Order of descriptors will match + * the order of field names. + */ + void FillFieldMappings( + const TStructType* fromType, + const Descriptor& toType, + TVector<TFieldMapping>& mappings, + const TMaybe<TString>& timestampColumn, + bool listIsOptional, + const THashMap<TString, TString>& fieldRenames + ) { + THashMap<TString, TString> inverseFieldRenames; + + for (const auto& [source, target]: fieldRenames) { + auto [iterator, emplaced] = inverseFieldRenames.emplace(target, source); + Y_ENSURE(emplaced, "Duplicate rename field found: " << source << " -> " << target); + } + + mappings.resize(fromType->GetMembersCount()); + for (ui32 i = 0; i < fromType->GetMembersCount(); ++i) { + TString fieldName(fromType->GetMemberName(i)); + if (auto fieldRenamePtr = inverseFieldRenames.FindPtr(fieldName)) { + fieldName = *fieldRenamePtr; + } + + mappings[i].Name = fieldName; + mappings[i].Field = toType.FindFieldByName(fieldName); + YQL_ENSURE( + mappings[i].Field || timestampColumn && *timestampColumn == fieldName, + "Missing field: " << fieldName); + + const auto* fieldType = fromType->GetMemberType(i); + if (fieldType->GetKind() == NKikimr::NMiniKQL::TType::EKind::List) { + const auto* listType = static_cast<const NKikimr::NMiniKQL::TListType*>(fieldType); + fieldType = listType->GetItemType(); + } else if (fieldType->GetKind() == NKikimr::NMiniKQL::TType::EKind::Optional) { + const auto* optionalType = static_cast<const NKikimr::NMiniKQL::TOptionalType*>(fieldType); + fieldType = optionalType->GetItemType(); + + if (listIsOptional) { + if (fieldType->GetKind() == NKikimr::NMiniKQL::TType::EKind::List) { + const auto* listType = static_cast<const NKikimr::NMiniKQL::TListType*>(fieldType); + fieldType = listType->GetItemType(); + } + } + } + YQL_ENSURE(fieldType->GetKind() == NKikimr::NMiniKQL::TType::EKind::Struct || + fieldType->GetKind() == NKikimr::NMiniKQL::TType::EKind::Data, + "unsupported field kind [" << fieldType->GetKindAsStr() << "], field [" << fieldName << "]"); + if (fieldType->GetKind() == NKikimr::NMiniKQL::TType::EKind::Struct) { + FillFieldMappings(static_cast<const NKikimr::NMiniKQL::TStructType*>(fieldType), + *mappings[i].Field->message_type(), + mappings[i].NestedFields, Nothing(), listIsOptional, {}); + } + } + } + + /** + * Extract field values from the given protobuf message into an array of unboxed values. + * + * @param factory to create nested unboxed values. + * @param source source protobuf message. + * @param destination destination array of unboxed values. Each element in the array corresponds to a field + * in the protobuf message. + * @param mappings vector of protobuf field descriptors which denotes relation between fields of the + * source message and elements of the destination array. + * @param scratch temporary string which will be used during conversion. + */ + void FillInputValue( + const THolderFactory& factory, + const Message* source, + TUnboxedValue* destination, + const TVector<TFieldMapping>& mappings, + const TMaybe<TString>& timestampColumn, + ITimeProvider* timeProvider, + EEnumPolicy enumPolicy + ) { + TString scratch; + auto reflection = source->GetReflection(); + for (ui32 i = 0; i < mappings.size(); ++i) { + auto mapping = mappings[i]; + if (!mapping.Field) { + YQL_ENSURE(timestampColumn && mapping.Name == *timestampColumn); + destination[i] = TUnboxedValuePod(timeProvider->Now().MicroSeconds()); + continue; + } + + const auto type = mapping.Field->type(); + if (mapping.Field->label() == FieldDescriptor::LABEL_REPEATED) { + const auto size = static_cast<ui32>(reflection->FieldSize(*source, mapping.Field)); + if (size == 0) { + destination[i] = factory.GetEmptyContainer(); + } else { + TUnboxedValue* inplace = nullptr; + destination[i] = factory.CreateDirectArrayHolder(size, inplace); + for (ui32 j = 0; j < size; ++j) { + switch (type) { + case FieldDescriptor::TYPE_DOUBLE: + inplace[j] = TUnboxedValuePod(reflection->GetRepeatedDouble(*source, mapping.Field, j)); + break; + + case FieldDescriptor::TYPE_FLOAT: + inplace[j] = TUnboxedValuePod(reflection->GetRepeatedFloat(*source, mapping.Field, j)); + break; + + case FieldDescriptor::TYPE_INT64: + case FieldDescriptor::TYPE_SFIXED64: + case FieldDescriptor::TYPE_SINT64: + inplace[j] = TUnboxedValuePod(reflection->GetRepeatedInt64(*source, mapping.Field, j)); + break; + + case FieldDescriptor::TYPE_ENUM: + switch (EnumFormatType(*mapping.Field, enumPolicy)) { + case EEnumFormatType::Int32: + inplace[j] = TUnboxedValuePod(reflection->GetRepeatedEnumValue(*source, mapping.Field, j)); + break; + case EEnumFormatType::String: + inplace[j] = MakeString(reflection->GetRepeatedEnum(*source, mapping.Field, j)->name()); + break; + } + break; + + case FieldDescriptor::TYPE_UINT64: + case FieldDescriptor::TYPE_FIXED64: + inplace[j] = TUnboxedValuePod(reflection->GetRepeatedUInt64(*source, mapping.Field, j)); + break; + + case FieldDescriptor::TYPE_INT32: + case FieldDescriptor::TYPE_SFIXED32: + case FieldDescriptor::TYPE_SINT32: + inplace[j] = TUnboxedValuePod(reflection->GetRepeatedInt32(*source, mapping.Field, j)); + break; + + case FieldDescriptor::TYPE_UINT32: + case FieldDescriptor::TYPE_FIXED32: + inplace[j] = TUnboxedValuePod(reflection->GetRepeatedUInt32(*source, mapping.Field, j)); + break; + + case FieldDescriptor::TYPE_BOOL: + inplace[j] = TUnboxedValuePod(reflection->GetRepeatedBool(*source, mapping.Field, j)); + break; + + case FieldDescriptor::TYPE_STRING: + inplace[j] = MakeString(reflection->GetRepeatedStringReference(*source, mapping.Field, j, &scratch)); + break; + + case FieldDescriptor::TYPE_BYTES: + inplace[j] = MakeString(reflection->GetRepeatedStringReference(*source, mapping.Field, j, &scratch)); + break; + + case FieldDescriptor::TYPE_MESSAGE: + { + const Message& nestedMessage = reflection->GetRepeatedMessage(*source, mapping.Field, j); + TUnboxedValue* nestedValues = nullptr; + inplace[j] = factory.CreateDirectArrayHolder(static_cast<ui32>(mapping.NestedFields.size()), + nestedValues); + FillInputValue(factory, &nestedMessage, nestedValues, mapping.NestedFields, Nothing(), timeProvider, enumPolicy); + } + break; + + default: + ythrow yexception() << "Unsupported protobuf type: " << mapping.Field->type_name() << ", field: " << mapping.Field->name(); + } + } + } + } else { + if (!reflection->HasField(*source, mapping.Field)) { + continue; + } + + switch (type) { + case FieldDescriptor::TYPE_DOUBLE: + destination[i] = TUnboxedValuePod(reflection->GetDouble(*source, mapping.Field)); + break; + + case FieldDescriptor::TYPE_FLOAT: + destination[i] = TUnboxedValuePod(reflection->GetFloat(*source, mapping.Field)); + break; + + case FieldDescriptor::TYPE_INT64: + case FieldDescriptor::TYPE_SFIXED64: + case FieldDescriptor::TYPE_SINT64: + destination[i] = TUnboxedValuePod(reflection->GetInt64(*source, mapping.Field)); + break; + + case FieldDescriptor::TYPE_ENUM: + switch (EnumFormatType(*mapping.Field, enumPolicy)) { + case EEnumFormatType::Int32: + destination[i] = TUnboxedValuePod(reflection->GetEnumValue(*source, mapping.Field)); + break; + case EEnumFormatType::String: + destination[i] = MakeString(reflection->GetEnum(*source, mapping.Field)->name()); + break; + } + break; + + case FieldDescriptor::TYPE_UINT64: + case FieldDescriptor::TYPE_FIXED64: + destination[i] = TUnboxedValuePod(reflection->GetUInt64(*source, mapping.Field)); + break; + + case FieldDescriptor::TYPE_INT32: + case FieldDescriptor::TYPE_SFIXED32: + case FieldDescriptor::TYPE_SINT32: + destination[i] = TUnboxedValuePod(reflection->GetInt32(*source, mapping.Field)); + break; + + case FieldDescriptor::TYPE_UINT32: + case FieldDescriptor::TYPE_FIXED32: + destination[i] = TUnboxedValuePod(reflection->GetUInt32(*source, mapping.Field)); + break; + + case FieldDescriptor::TYPE_BOOL: + destination[i] = TUnboxedValuePod(reflection->GetBool(*source, mapping.Field)); + break; + + case FieldDescriptor::TYPE_STRING: + destination[i] = MakeString(reflection->GetStringReference(*source, mapping.Field, &scratch)); + break; + + case FieldDescriptor::TYPE_BYTES: + destination[i] = MakeString(reflection->GetStringReference(*source, mapping.Field, &scratch)); + break; + case FieldDescriptor::TYPE_MESSAGE: + { + const Message& nestedMessage = reflection->GetMessage(*source, mapping.Field); + TUnboxedValue* nestedValues = nullptr; + destination[i] = factory.CreateDirectArrayHolder(static_cast<ui32>(mapping.NestedFields.size()), + nestedValues); + FillInputValue(factory, &nestedMessage, nestedValues, mapping.NestedFields, Nothing(), timeProvider, enumPolicy); + } + break; + + default: + ythrow yexception() << "Unsupported protobuf type: " << mapping.Field->type_name() + << ", field: " << mapping.Field->name(); + } + } + } + } + + + /** + * Convert unboxed value to protobuf. + * + * @param source unboxed value to extract data from. Type of the value should be struct. It's UB to pass + * a non-struct value here. + * @param destination destination message. Data in this message will be overwritten + * by data from unboxed value. + * @param mappings vector of protobuf field descriptors which denotes relation between struct fields + * and message fields. For any i-th element of this vector, type of the i-th element of + * the unboxed structure must match type of the field pointed by descriptor. Size of this + * vector should match the number of fields in the struct. + */ + void FillOutputMessage( + const TUnboxedValue& source, + Message* destination, + const TVector<TFieldMapping>& mappings, + EEnumPolicy enumPolicy + ) { + auto reflection = destination->GetReflection(); + for (ui32 i = 0; i < mappings.size(); ++i) { + const auto& mapping = mappings[i]; + const auto& cell = source.GetElement(i); + if (!cell) { + reflection->ClearField(destination, mapping.Field); + continue; + } + const auto type = mapping.Field->type(); + if (mapping.Field->label() == FieldDescriptor::LABEL_REPEATED) { + const auto iter = cell.GetListIterator(); + reflection->ClearField(destination, mapping.Field); + for (TUnboxedValue item; iter.Next(item);) { + switch (mapping.Field->type()) { + case FieldDescriptor::TYPE_DOUBLE: + reflection->AddDouble(destination, mapping.Field, item.Get<double>()); + break; + + case FieldDescriptor::TYPE_FLOAT: + reflection->AddFloat(destination, mapping.Field, item.Get<float>()); + break; + + case FieldDescriptor::TYPE_INT64: + case FieldDescriptor::TYPE_SFIXED64: + case FieldDescriptor::TYPE_SINT64: + reflection->AddInt64(destination, mapping.Field, item.Get<i64>()); + break; + + case FieldDescriptor::TYPE_ENUM: { + switch (EnumFormatType(*mapping.Field, enumPolicy)) { + case EEnumFormatType::Int32: + reflection->AddEnumValue(destination, mapping.Field, item.Get<i32>()); + break; + case EEnumFormatType::String: { + auto enumValueDescriptor = mapping.Field->enum_type()->FindValueByName(TString(item.AsStringRef())); + if (!enumValueDescriptor) { + enumValueDescriptor = mapping.Field->default_value_enum(); + } + reflection->AddEnum(destination, mapping.Field, enumValueDescriptor); + break; + } + } + break; + } + + case FieldDescriptor::TYPE_UINT64: + case FieldDescriptor::TYPE_FIXED64: + reflection->AddUInt64(destination, mapping.Field, item.Get<ui64>()); + break; + + case FieldDescriptor::TYPE_INT32: + case FieldDescriptor::TYPE_SFIXED32: + case FieldDescriptor::TYPE_SINT32: + reflection->AddInt32(destination, mapping.Field, item.Get<i32>()); + break; + + case FieldDescriptor::TYPE_UINT32: + case FieldDescriptor::TYPE_FIXED32: + reflection->AddUInt32(destination, mapping.Field, item.Get<ui32>()); + break; + + case FieldDescriptor::TYPE_BOOL: + reflection->AddBool(destination, mapping.Field, item.Get<bool>()); + break; + + case FieldDescriptor::TYPE_STRING: + reflection->AddString(destination, mapping.Field, TString(item.AsStringRef())); + break; + + case FieldDescriptor::TYPE_BYTES: + reflection->AddString(destination, mapping.Field, TString(item.AsStringRef())); + break; + + case FieldDescriptor::TYPE_MESSAGE: + { + auto* nestedMessage = reflection->AddMessage(destination, mapping.Field); + FillOutputMessage(item, nestedMessage, mapping.NestedFields, enumPolicy); + } + break; + + default: + ythrow yexception() << "Unsupported protobuf type: " + << mapping.Field->type_name() << ", field: " << mapping.Field->name(); + } + } + } else { + switch (type) { + case FieldDescriptor::TYPE_DOUBLE: + reflection->SetDouble(destination, mapping.Field, cell.Get<double>()); + break; + + case FieldDescriptor::TYPE_FLOAT: + reflection->SetFloat(destination, mapping.Field, cell.Get<float>()); + break; + + case FieldDescriptor::TYPE_INT64: + case FieldDescriptor::TYPE_SFIXED64: + case FieldDescriptor::TYPE_SINT64: + reflection->SetInt64(destination, mapping.Field, cell.Get<i64>()); + break; + + case FieldDescriptor::TYPE_ENUM: { + switch (EnumFormatType(*mapping.Field, enumPolicy)) { + case EEnumFormatType::Int32: + reflection->SetEnumValue(destination, mapping.Field, cell.Get<i32>()); + break; + case EEnumFormatType::String: { + auto enumValueDescriptor = mapping.Field->enum_type()->FindValueByName(TString(cell.AsStringRef())); + if (!enumValueDescriptor) { + enumValueDescriptor = mapping.Field->default_value_enum(); + } + reflection->SetEnum(destination, mapping.Field, enumValueDescriptor); + break; + } + } + break; + } + + case FieldDescriptor::TYPE_UINT64: + case FieldDescriptor::TYPE_FIXED64: + reflection->SetUInt64(destination, mapping.Field, cell.Get<ui64>()); + break; + + case FieldDescriptor::TYPE_INT32: + case FieldDescriptor::TYPE_SFIXED32: + case FieldDescriptor::TYPE_SINT32: + reflection->SetInt32(destination, mapping.Field, cell.Get<i32>()); + break; + + case FieldDescriptor::TYPE_UINT32: + case FieldDescriptor::TYPE_FIXED32: + reflection->SetUInt32(destination, mapping.Field, cell.Get<ui32>()); + break; + + case FieldDescriptor::TYPE_BOOL: + reflection->SetBool(destination, mapping.Field, cell.Get<bool>()); + break; + + case FieldDescriptor::TYPE_STRING: + reflection->SetString(destination, mapping.Field, TString(cell.AsStringRef())); + break; + + case FieldDescriptor::TYPE_BYTES: + reflection->SetString(destination, mapping.Field, TString(cell.AsStringRef())); + break; + + case FieldDescriptor::TYPE_MESSAGE: + { + auto* nestedMessage = reflection->MutableMessage(destination, mapping.Field); + FillOutputMessage(cell, nestedMessage, mapping.NestedFields, enumPolicy); + } + break; + + default: + ythrow yexception() << "Unsupported protobuf type: " + << mapping.Field->type_name() << ", field: " << mapping.Field->name(); + } + } + } + } + + /** + * Converts input messages to unboxed values. + */ + class TInputConverter { + protected: + IWorker* Worker_; + TVector<TFieldMapping> Mappings_; + TPlainContainerCache Cache_; + TMaybe<TString> TimestampColumn_; + EEnumPolicy EnumPolicy_ = EEnumPolicy::Int32; + + public: + explicit TInputConverter(const TProtobufRawInputSpec& inputSpec, IWorker* worker) + : Worker_(worker) + , TimestampColumn_(inputSpec.GetTimestampColumn()) + , EnumPolicy_(inputSpec.GetSchemaOptions().EnumPolicy) + { + FillFieldMappings( + Worker_->GetInputType(), inputSpec.GetDescriptor(), + Mappings_, TimestampColumn_, + inputSpec.GetSchemaOptions().ListIsOptional, + inputSpec.GetSchemaOptions().FieldRenames + ); + } + + public: + void DoConvert(const Message* message, TUnboxedValue& result) { + auto& holderFactory = Worker_->GetGraph().GetHolderFactory(); + TUnboxedValue* items = nullptr; + result = Cache_.NewArray(holderFactory, static_cast<ui32>(Mappings_.size()), items); + FillInputValue(holderFactory, message, items, Mappings_, TimestampColumn_, Worker_->GetTimeProvider(), EnumPolicy_); + } + + void ClearCache() { + Cache_.Clear(); + } + }; + + template <typename TOutputSpec> + using OutputItemType = typename TOutputSpecTraits<TOutputSpec>::TOutputItemType; + + template <typename TOutputSpec> + class TOutputConverter; + + /** + * Converts unboxed values to output messages (single-output program case). + */ + template <> + class TOutputConverter<TProtobufRawOutputSpec> { + protected: + IWorker* Worker_; + TVector<TFieldMapping> OutputColumns_; + TProtoHolder<Message> Message_; + EEnumPolicy EnumPolicy_ = EEnumPolicy::Int32; + + public: + explicit TOutputConverter(const TProtobufRawOutputSpec& outputSpec, IWorker* worker) + : Worker_(worker) + , EnumPolicy_(outputSpec.GetSchemaOptions().EnumPolicy) + { + if (!Worker_->GetOutputType()->IsStruct()) { + ythrow yexception() << "protobuf output spec does not support multiple outputs"; + } + + FillFieldMappings( + static_cast<const NKikimr::NMiniKQL::TStructType*>(Worker_->GetOutputType()), + outputSpec.GetDescriptor(), + OutputColumns_, + Nothing(), + outputSpec.GetSchemaOptions().ListIsOptional, + outputSpec.GetSchemaOptions().FieldRenames + ); + + auto* factory = outputSpec.GetFactory(); + + if (!factory) { + factory = MessageFactory::generated_factory(); + } + + Message_.Reset(factory->GetPrototype(&outputSpec.GetDescriptor())->New(outputSpec.GetArena())); + } + + OutputItemType<TProtobufRawOutputSpec> DoConvert(TUnboxedValue value) { + FillOutputMessage(value, Message_.Get(), OutputColumns_, EnumPolicy_); + return Message_.Get(); + } + }; + + /* + * Converts unboxed values to output type (multi-output programs case). + */ + template <> + class TOutputConverter<TProtobufRawMultiOutputSpec> { + protected: + IWorker* Worker_; + TVector<TVector<TFieldMapping>> OutputColumns_; + TVector<TProtoHolder<Message>> Messages_; + EEnumPolicy EnumPolicy_ = EEnumPolicy::Int32; + + public: + explicit TOutputConverter(const TProtobufRawMultiOutputSpec& outputSpec, IWorker* worker) + : Worker_(worker) + , EnumPolicy_(outputSpec.GetSchemaOptions().EnumPolicy) + { + const auto* outputType = Worker_->GetOutputType(); + Y_ENSURE(outputType->IsVariant(), "protobuf multi-output spec requires multi-output program"); + const auto* variantType = static_cast<const NKikimr::NMiniKQL::TVariantType*>(outputType); + Y_ENSURE( + variantType->GetUnderlyingType()->IsTuple(), + "protobuf multi-output spec requires variant over tuple as program output type" + ); + Y_ENSURE( + outputSpec.GetOutputsNumber() == variantType->GetAlternativesCount(), + "number of outputs provided by spec does not match number of variant alternatives" + ); + + auto defaultFactory = MessageFactory::generated_factory(); + + for (ui32 i = 0; i < variantType->GetAlternativesCount(); ++i) { + const auto* type = variantType->GetAlternativeType(i); + Y_ASSERT(type->IsStruct()); + Y_ASSERT(OutputColumns_.size() == i && Messages_.size() == i); + + OutputColumns_.push_back({}); + + FillFieldMappings( + static_cast<const NKikimr::NMiniKQL::TStructType*>(type), + outputSpec.GetDescriptor(i), + OutputColumns_.back(), + Nothing(), + outputSpec.GetSchemaOptions().ListIsOptional, + {} + ); + + auto factory = outputSpec.GetFactory(i); + if (!factory) { + factory = defaultFactory; + } + + Messages_.push_back(TProtoHolder<Message>( + factory->GetPrototype(&outputSpec.GetDescriptor(i))->New(outputSpec.GetArena(i)) + )); + } + } + + OutputItemType<TProtobufRawMultiOutputSpec> DoConvert(TUnboxedValue value) { + auto index = value.GetVariantIndex(); + auto msgPtr = Messages_[index].Get(); + FillOutputMessage(value.GetVariantItem(), msgPtr, OutputColumns_[index], EnumPolicy_); + return {index, msgPtr}; + } + }; + + /** + * List (or, better, stream) of unboxed values. Used as an input value in pull workers. + */ + class TProtoListValue final: public TCustomListValue { + private: + mutable bool HasIterator_ = false; + THolder<IStream<Message*>> Underlying_; + TInputConverter Converter_; + IWorker* Worker_; + TScopedAlloc& ScopedAlloc_; + + public: + TProtoListValue( + TMemoryUsageInfo* memInfo, + const TProtobufRawInputSpec& inputSpec, + THolder<IStream<Message*>> underlying, + IWorker* worker + ) + : TCustomListValue(memInfo) + , Underlying_(std::move(underlying)) + , Converter_(inputSpec, worker) + , Worker_(worker) + , ScopedAlloc_(Worker_->GetScopedAlloc()) + { + } + + ~TProtoListValue() override { + { + // This list value stored in the worker's computation graph and destroyed upon the computation + // graph's destruction. This brings us to an interesting situation: scoped alloc is acquired, + // worker and computation graph are half-way destroyed, and now it's our turn to die. The problem is, + // the underlying stream may own another worker. This happens when chaining programs. Now, to destroy + // that worker correctly, we need to release our scoped alloc (because that worker has its own + // computation graph and scoped alloc). + // By the way, note that we shouldn't interact with the worker here because worker is in the middle of + // its own destruction. So we're using our own reference to the scoped alloc. That reference is alive + // because scoped alloc destroyed after computation graph. + auto unguard = Unguard(ScopedAlloc_); + Underlying_.Destroy(); + } + } + + public: + TUnboxedValue GetListIterator() const override { + YQL_ENSURE(!HasIterator_, "Only one pass over input is supported"); + HasIterator_ = true; + return TUnboxedValuePod(const_cast<TProtoListValue*>(this)); + } + + bool Next(TUnboxedValue& result) override { + const Message* message; + { + auto unguard = Unguard(ScopedAlloc_); + message = Underlying_->Fetch(); + } + + if (!message) { + return false; + } + + Converter_.DoConvert(message, result); + + return true; + } + + EFetchStatus Fetch(TUnboxedValue& result) override { + if (Next(result)) { + return EFetchStatus::Ok; + } else { + return EFetchStatus::Finish; + } + } + }; + + /** + * Consumer which converts messages to unboxed values and relays them to the worker. Used as a return value + * of the push processor's Process function. + */ + class TProtoConsumerImpl final: public IConsumer<Message*> { + private: + TWorkerHolder<IPushStreamWorker> WorkerHolder_; + TInputConverter Converter_; + + public: + explicit TProtoConsumerImpl( + const TProtobufRawInputSpec& inputSpec, + TWorkerHolder<IPushStreamWorker> worker + ) + : WorkerHolder_(std::move(worker)) + , Converter_(inputSpec, WorkerHolder_.Get()) + { + } + + ~TProtoConsumerImpl() override { + with_lock(WorkerHolder_->GetScopedAlloc()) { + Converter_.ClearCache(); + } + } + + public: + void OnObject(Message* message) override { + TBindTerminator bind(WorkerHolder_->GetGraph().GetTerminator()); + + with_lock(WorkerHolder_->GetScopedAlloc()) { + TUnboxedValue result; + Converter_.DoConvert(message, result); + WorkerHolder_->Push(std::move(result)); + } + } + + void OnFinish() override { + TBindTerminator bind(WorkerHolder_->GetGraph().GetTerminator()); + + with_lock(WorkerHolder_->GetScopedAlloc()) { + WorkerHolder_->OnFinish(); + } + } + }; + + /** + * Protobuf input stream for unboxed value streams. + */ + template <typename TOutputSpec> + class TRawProtoStreamImpl final: public IStream<OutputItemType<TOutputSpec>> { + protected: + TWorkerHolder<IPullStreamWorker> WorkerHolder_; + TOutputConverter<TOutputSpec> Converter_; + + public: + explicit TRawProtoStreamImpl(const TOutputSpec& outputSpec, TWorkerHolder<IPullStreamWorker> worker) + : WorkerHolder_(std::move(worker)) + , Converter_(outputSpec, WorkerHolder_.Get()) + { + } + + public: + OutputItemType<TOutputSpec> Fetch() override { + TBindTerminator bind(WorkerHolder_->GetGraph().GetTerminator()); + + with_lock(WorkerHolder_->GetScopedAlloc()) { + TUnboxedValue value; + + auto status = WorkerHolder_->GetOutput().Fetch(value); + + YQL_ENSURE(status != EFetchStatus::Yield, "Yield is not supported in pull mode"); + + if (status == EFetchStatus::Finish) { + return TOutputSpecTraits<TOutputSpec>::StreamSentinel; + } + + return Converter_.DoConvert(value); + } + } + }; + + /** + * Protobuf input stream for unboxed value lists. + */ + template <typename TOutputSpec> + class TRawProtoListImpl final: public IStream<OutputItemType<TOutputSpec>> { + protected: + TWorkerHolder<IPullListWorker> WorkerHolder_; + TOutputConverter<TOutputSpec> Converter_; + + public: + explicit TRawProtoListImpl(const TOutputSpec& outputSpec, TWorkerHolder<IPullListWorker> worker) + : WorkerHolder_(std::move(worker)) + , Converter_(outputSpec, WorkerHolder_.Get()) + { + } + + public: + OutputItemType<TOutputSpec> Fetch() override { + TBindTerminator bind(WorkerHolder_->GetGraph().GetTerminator()); + + with_lock(WorkerHolder_->GetScopedAlloc()) { + TUnboxedValue value; + + if (!WorkerHolder_->GetOutputIterator().Next(value)) { + return TOutputSpecTraits<TOutputSpec>::StreamSentinel; + } + + return Converter_.DoConvert(value); + } + } + }; + + /** + * Push relay used to convert generated unboxed value to a message and push it to the user's consumer. + */ + template <typename TOutputSpec> + class TPushRelayImpl: public IConsumer<const TUnboxedValue*> { + private: + THolder<IConsumer<OutputItemType<TOutputSpec>>> Underlying_; + TOutputConverter<TOutputSpec> Converter_; + IWorker* Worker_; + + public: + TPushRelayImpl( + const TOutputSpec& outputSpec, + IPushStreamWorker* worker, + THolder<IConsumer<OutputItemType<TOutputSpec>>> underlying + ) + : Underlying_(std::move(underlying)) + , Converter_(outputSpec, worker) + , Worker_(worker) + { + } + + // If you've read a comment in the TProtoListValue's destructor, you may be wondering why don't we do the + // same trick here. Well, that's because in push mode, consumer is destroyed before acquiring scoped alloc and + // destroying computation graph. + + public: + void OnObject(const TUnboxedValue* value) override { + OutputItemType<TOutputSpec> message = Converter_.DoConvert(*value); + auto unguard = Unguard(Worker_->GetScopedAlloc()); + Underlying_->OnObject(message); + } + + void OnFinish() override { + auto unguard = Unguard(Worker_->GetScopedAlloc()); + Underlying_->OnFinish(); + } + }; +} + +using ConsumerType = TInputSpecTraits<TProtobufRawInputSpec>::TConsumerType; + +void TInputSpecTraits<TProtobufRawInputSpec>::PreparePullStreamWorker( + const TProtobufRawInputSpec& inputSpec, + IPullStreamWorker* worker, + THolder<IStream<Message*>> stream +) { + with_lock(worker->GetScopedAlloc()) { + worker->SetInput( + worker->GetGraph().GetHolderFactory().Create<TProtoListValue>(inputSpec, std::move(stream), worker), 0); + } +} + +void TInputSpecTraits<TProtobufRawInputSpec>::PreparePullListWorker( + const TProtobufRawInputSpec& inputSpec, + IPullListWorker* worker, + THolder<IStream<Message*>> stream +) { + with_lock(worker->GetScopedAlloc()) { + worker->SetInput( + worker->GetGraph().GetHolderFactory().Create<TProtoListValue>(inputSpec, std::move(stream), worker), 0); + } +} + +ConsumerType TInputSpecTraits<TProtobufRawInputSpec>::MakeConsumer( + const TProtobufRawInputSpec& inputSpec, + TWorkerHolder<IPushStreamWorker> worker +) { + return MakeHolder<TProtoConsumerImpl>(inputSpec, std::move(worker)); +} + +template <typename TOutputSpec> +using PullStreamReturnType = typename TOutputSpecTraits<TOutputSpec>::TPullStreamReturnType; +template <typename TOutputSpec> +using PullListReturnType = typename TOutputSpecTraits<TOutputSpec>::TPullListReturnType; + +PullStreamReturnType<TProtobufRawOutputSpec> TOutputSpecTraits<TProtobufRawOutputSpec>::ConvertPullStreamWorkerToOutputType( + const TProtobufRawOutputSpec& outputSpec, + TWorkerHolder<IPullStreamWorker> worker +) { + return MakeHolder<TRawProtoStreamImpl<TProtobufRawOutputSpec>>(outputSpec, std::move(worker)); +} + +PullListReturnType<TProtobufRawOutputSpec> TOutputSpecTraits<TProtobufRawOutputSpec>::ConvertPullListWorkerToOutputType( + const TProtobufRawOutputSpec& outputSpec, + TWorkerHolder<IPullListWorker> worker +) { + return MakeHolder<TRawProtoListImpl<TProtobufRawOutputSpec>>(outputSpec, std::move(worker)); +} + +void TOutputSpecTraits<TProtobufRawOutputSpec>::SetConsumerToWorker( + const TProtobufRawOutputSpec& outputSpec, + IPushStreamWorker* worker, + THolder<IConsumer<TOutputItemType>> consumer +) { + worker->SetConsumer(MakeHolder<TPushRelayImpl<TProtobufRawOutputSpec>>(outputSpec, worker, std::move(consumer))); +} + +PullStreamReturnType<TProtobufRawMultiOutputSpec> TOutputSpecTraits<TProtobufRawMultiOutputSpec>::ConvertPullStreamWorkerToOutputType( + const TProtobufRawMultiOutputSpec& outputSpec, + TWorkerHolder<IPullStreamWorker> worker +) { + return MakeHolder<TRawProtoStreamImpl<TProtobufRawMultiOutputSpec>>(outputSpec, std::move(worker)); +} + +PullListReturnType<TProtobufRawMultiOutputSpec> TOutputSpecTraits<TProtobufRawMultiOutputSpec>::ConvertPullListWorkerToOutputType( + const TProtobufRawMultiOutputSpec& outputSpec, + TWorkerHolder<IPullListWorker> worker +) { + return MakeHolder<TRawProtoListImpl<TProtobufRawMultiOutputSpec>>(outputSpec, std::move(worker)); +} + +void TOutputSpecTraits<TProtobufRawMultiOutputSpec>::SetConsumerToWorker( + const TProtobufRawMultiOutputSpec& outputSpec, + IPushStreamWorker* worker, + THolder<IConsumer<TOutputItemType>> consumer +) { + worker->SetConsumer(MakeHolder<TPushRelayImpl<TProtobufRawMultiOutputSpec>>(outputSpec, worker, std::move(consumer))); +} diff --git a/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/spec.h b/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/spec.h new file mode 100644 index 0000000000..2a8fd19648 --- /dev/null +++ b/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/spec.h @@ -0,0 +1,257 @@ +#pragma once + +#include <ydb/library/yql/public/purecalc/common/interface.h> +#include <ydb/library/yql/public/purecalc/helpers/protobuf/schema_from_proto.h> + +#include <google/protobuf/message.h> + +#include <util/generic/maybe.h> + +namespace NYql { + namespace NPureCalc { + /** + * Processing mode for working with raw protobuf message inputs. + * + * In this mode purecalc accept pointers to abstract protobuf messages and processes them using the reflection + * mechanism. All passed messages should have the same descriptor (the one you pass to the constructor + * of the input spec). + * + * All working modes are implemented. In pull stream and pull list modes a program would accept a single object + * stream of const protobuf messages. In push mode, a program will return a consumer of const protobuf messages. + * + * The program synopsis follows: + * + * @code + * ... TPullStreamProgram::Apply(IStream<google::protobuf::Message*>); + * ... TPullListProgram::Apply(IStream<google::protobuf::Message*>); + * TConsumer<google::protobuf::Message*> TPushStreamProgram::Apply(...); + * @endcode + */ + class TProtobufRawInputSpec: public TInputSpecBase { + private: + const google::protobuf::Descriptor& Descriptor_; + const TMaybe<TString> TimestampColumn_; + const TProtoSchemaOptions SchemaOptions_; + mutable TVector<NYT::TNode> SavedSchemas_; + + public: + /** + * Build input spec and associate the given message descriptor. + */ + explicit TProtobufRawInputSpec( + const google::protobuf::Descriptor& descriptor, + const TMaybe<TString>& timestampColumn = Nothing(), + const TProtoSchemaOptions& options = {} + ); + + public: + const TVector<NYT::TNode>& GetSchemas() const override; + + /** + * Get the descriptor associated with this spec. + */ + const google::protobuf::Descriptor& GetDescriptor() const; + + const TMaybe<TString>& GetTimestampColumn() const; + + /* + * Get options that customize input struct type building. + */ + const TProtoSchemaOptions& GetSchemaOptions() const; + }; + + /** + * Processing mode for working with raw protobuf message outputs. + * + * In this mode purecalc yields pointers to abstract protobuf messages. All generated messages share the same + * descriptor so they can be safely converted into an appropriate message type. + * + * Note that one should not expect that the returned pointer will be valid forever; in can (and will) become + * outdated once a new output is requested/pushed. + * + * All working modes are implemented. In pull stream and pull list modes a program will return an object + * stream of non-const protobuf messages. In push mode, it will accept a single consumer of non-const + * messages. + * + * The program synopsis follows: + * + * @code + * IStream<google::protobuf::Message*> TPullStreamProgram::Apply(...); + * IStream<google::protobuf::Message*> TPullListProgram::Apply(...); + * ... TPushStreamProgram::Apply(TConsumer<google::protobuf::Message*>); + * @endcode + */ + class TProtobufRawOutputSpec: public TOutputSpecBase { + private: + const google::protobuf::Descriptor& Descriptor_; + google::protobuf::MessageFactory* Factory_; + TProtoSchemaOptions SchemaOptions_; + google::protobuf::Arena* Arena_; + mutable TMaybe<NYT::TNode> SavedSchema_; + + public: + /** + * Build output spec and associate the given message descriptor and maybe the given message factory. + */ + explicit TProtobufRawOutputSpec( + const google::protobuf::Descriptor& descriptor, + google::protobuf::MessageFactory* = nullptr, + const TProtoSchemaOptions& options = {}, + google::protobuf::Arena* arena = nullptr + ); + + public: + const NYT::TNode& GetSchema() const override; + + /** + * Get the descriptor associated with this spec. + */ + const google::protobuf::Descriptor& GetDescriptor() const; + + /** + * Set a new message factory which will be used to generate messages. Pass a null pointer to use the + * default factory. + */ + void SetFactory(google::protobuf::MessageFactory*); + + /** + * Get the message factory which is currently associated with this spec. + */ + google::protobuf::MessageFactory* GetFactory() const; + + /** + * Set a new arena which will be used to generate messages. Pass a null pointer to create on the heap. + */ + void SetArena(google::protobuf::Arena*); + + /** + * Get the arena which is currently associated with this spec. + */ + google::protobuf::Arena* GetArena() const; + + /** + * Get options that customize output struct type building. + */ + const TProtoSchemaOptions& GetSchemaOptions() const; + }; + + /** + * Processing mode for working with raw protobuf messages and several outputs. + * + * The program synopsis follows: + * + * @code + * IStream<std::pair<ui32, google::protobuf::Message*>> TPullStreamProgram::Apply(...); + * IStream<std::pair<ui32, google::protobuf::Message*>> TPullListProgram::Apply(...); + * ... TPushStreamProgram::Apply(TConsumer<std::pair<ui32, google::protobuf::Message*>>); + * @endcode + */ + class TProtobufRawMultiOutputSpec: public TOutputSpecBase { + private: + TVector<const google::protobuf::Descriptor*> Descriptors_; + TVector<google::protobuf::MessageFactory*> Factories_; + const TProtoSchemaOptions SchemaOptions_; + TVector<google::protobuf::Arena*> Arenas_; + mutable NYT::TNode SavedSchema_; + + public: + TProtobufRawMultiOutputSpec( + TVector<const google::protobuf::Descriptor*>, + TMaybe<TVector<google::protobuf::MessageFactory*>> = {}, + const TProtoSchemaOptions& options = {}, + TMaybe<TVector<google::protobuf::Arena*>> arenas = {} + ); + + public: + const NYT::TNode& GetSchema() const override; + + /** + * Get the descriptor associated with given output. + */ + const google::protobuf::Descriptor& GetDescriptor(ui32) const; + + /** + * Set a new message factory for given output. It will be used to generate messages for this output. + */ + void SetFactory(ui32, google::protobuf::MessageFactory*); + + /** + * Get the message factory which is currently associated with given output. + */ + google::protobuf::MessageFactory* GetFactory(ui32) const; + + /** + * Set a new arena for given output. It will be used to generate messages for this output. + */ + void SetArena(ui32, google::protobuf::Arena*); + + /** + * Get the arena which is currently associated with given output. + */ + google::protobuf::Arena* GetArena(ui32) const; + + /** + * Get number of outputs for this spec. + */ + ui32 GetOutputsNumber() const; + + /** + * Get options that customize output struct type building. + */ + const TProtoSchemaOptions& GetSchemaOptions() const; + }; + + template <> + struct TInputSpecTraits<TProtobufRawInputSpec> { + static const constexpr bool IsPartial = false; + + static const constexpr bool SupportPullStreamMode = true; + static const constexpr bool SupportPullListMode = true; + static const constexpr bool SupportPushStreamMode = true; + + using TConsumerType = THolder<IConsumer<google::protobuf::Message*>>; + + static void PreparePullStreamWorker(const TProtobufRawInputSpec&, IPullStreamWorker*, THolder<IStream<google::protobuf::Message*>>); + static void PreparePullListWorker(const TProtobufRawInputSpec&, IPullListWorker*, THolder<IStream<google::protobuf::Message*>>); + static TConsumerType MakeConsumer(const TProtobufRawInputSpec&, TWorkerHolder<IPushStreamWorker>); + }; + + template <> + struct TOutputSpecTraits<TProtobufRawOutputSpec> { + static const constexpr bool IsPartial = false; + + static const constexpr bool SupportPullStreamMode = true; + static const constexpr bool SupportPullListMode = true; + static const constexpr bool SupportPushStreamMode = true; + + using TOutputItemType = google::protobuf::Message*; + using TPullStreamReturnType = THolder<IStream<TOutputItemType>>; + using TPullListReturnType = THolder<IStream<TOutputItemType>>; + + static const constexpr TOutputItemType StreamSentinel = nullptr; + + static TPullStreamReturnType ConvertPullStreamWorkerToOutputType(const TProtobufRawOutputSpec&, TWorkerHolder<IPullStreamWorker>); + static TPullListReturnType ConvertPullListWorkerToOutputType(const TProtobufRawOutputSpec&, TWorkerHolder<IPullListWorker>); + static void SetConsumerToWorker(const TProtobufRawOutputSpec&, IPushStreamWorker*, THolder<IConsumer<TOutputItemType>>); + }; + + template <> + struct TOutputSpecTraits<TProtobufRawMultiOutputSpec> { + static const constexpr bool IsPartial = false; + + static const constexpr bool SupportPullStreamMode = true; + static const constexpr bool SupportPullListMode = true; + static const constexpr bool SupportPushStreamMode = true; + + using TOutputItemType = std::pair<ui32, google::protobuf::Message*>; + using TPullStreamReturnType = THolder<IStream<TOutputItemType>>; + using TPullListReturnType = THolder<IStream<TOutputItemType>>; + + static const constexpr TOutputItemType StreamSentinel = {0, nullptr}; + + static TPullStreamReturnType ConvertPullStreamWorkerToOutputType(const TProtobufRawMultiOutputSpec&, TWorkerHolder<IPullStreamWorker>); + static TPullListReturnType ConvertPullListWorkerToOutputType(const TProtobufRawMultiOutputSpec&, TWorkerHolder<IPullListWorker>); + static void SetConsumerToWorker(const TProtobufRawMultiOutputSpec&, IPushStreamWorker*, THolder<IConsumer<TOutputItemType>>); + }; + } +} diff --git a/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/ya.make b/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/ya.make new file mode 100644 index 0000000000..ad72bbf43a --- /dev/null +++ b/ydb/library/yql/public/purecalc/io_specs/protobuf_raw/ya.make @@ -0,0 +1,16 @@ +LIBRARY() + +PEERDIR( + ydb/library/yql/public/purecalc/common + ydb/library/yql/public/purecalc/helpers/protobuf +) + +SRCS( + proto_holder.cpp + spec.cpp + spec.h +) + +YQL_LAST_ABI_VERSION() + +END() diff --git a/ydb/library/yql/public/purecalc/io_specs/ut/ya.make b/ydb/library/yql/public/purecalc/io_specs/ut/ya.make new file mode 100644 index 0000000000..b0179f3af0 --- /dev/null +++ b/ydb/library/yql/public/purecalc/io_specs/ut/ya.make @@ -0,0 +1,4 @@ +RECURSE( + ../mkql/ut + ../protobuf/ut +) diff --git a/ydb/library/yql/public/purecalc/io_specs/ya.make b/ydb/library/yql/public/purecalc/io_specs/ya.make new file mode 100644 index 0000000000..c30a69d40b --- /dev/null +++ b/ydb/library/yql/public/purecalc/io_specs/ya.make @@ -0,0 +1,9 @@ +RECURSE( + mkql + protobuf + protobuf_raw +) + +RECURSE_FOR_TESTS( + ut +) diff --git a/ydb/library/yql/public/purecalc/purecalc.cpp b/ydb/library/yql/public/purecalc/purecalc.cpp new file mode 100644 index 0000000000..80cfd39d96 --- /dev/null +++ b/ydb/library/yql/public/purecalc/purecalc.cpp @@ -0,0 +1 @@ +#include "purecalc.h" diff --git a/ydb/library/yql/public/purecalc/purecalc.h b/ydb/library/yql/public/purecalc/purecalc.h new file mode 100644 index 0000000000..83bd8a7b84 --- /dev/null +++ b/ydb/library/yql/public/purecalc/purecalc.h @@ -0,0 +1,3 @@ +#pragma once + +#include "common/interface.h" diff --git a/ydb/library/yql/public/purecalc/ut/CMakeLists.darwin-x86_64.txt b/ydb/library/yql/public/purecalc/ut/CMakeLists.darwin-x86_64.txt new file mode 100644 index 0000000000..4a4c7b68a2 --- /dev/null +++ b/ydb/library/yql/public/purecalc/ut/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,78 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(lib) +add_subdirectory(protos) + +add_executable(ydb-library-yql-public-purecalc-ut) +target_compile_options(ydb-library-yql-public-purecalc-ut PRIVATE + -DUSE_CURRENT_UDF_ABI_VERSION +) +target_link_libraries(ydb-library-yql-public-purecalc-ut PUBLIC + contrib-libs-cxxsupp + yutil + library-cpp-cpuid_check + cpp-testing-unittest_main + yql-public-purecalc + purecalc-io_specs-protobuf + purecalc-ut-protos +) +target_link_options(ydb-library-yql-public-purecalc-ut PRIVATE + -Wl,-platform_version,macos,11.0,11.0 + -fPIC + -fPIC + -framework + CoreFoundation +) +target_sources(ydb-library-yql-public-purecalc-ut PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/fake_spec.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/test_schema.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/test_sexpr.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/test_sql.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/test_udf.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/test_user_data.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/test_eval.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/test_pool.cpp +) +set_property( + TARGET + ydb-library-yql-public-purecalc-ut + PROPERTY + SPLIT_FACTOR + 1 +) +add_yunittest( + NAME + ydb-library-yql-public-purecalc-ut + TEST_TARGET + ydb-library-yql-public-purecalc-ut + TEST_ARG + --print-before-suite + --print-before-test + --fork-tests + --print-times + --show-fails +) +set_yunittest_property( + TEST + ydb-library-yql-public-purecalc-ut + PROPERTY + LABELS + MEDIUM +) +set_yunittest_property( + TEST + ydb-library-yql-public-purecalc-ut + PROPERTY + PROCESSORS + 1 +) +target_allocator(ydb-library-yql-public-purecalc-ut + system_allocator +) +vcs_info(ydb-library-yql-public-purecalc-ut) diff --git a/ydb/library/yql/public/purecalc/ut/CMakeLists.linux-aarch64.txt b/ydb/library/yql/public/purecalc/ut/CMakeLists.linux-aarch64.txt new file mode 100644 index 0000000000..01d22ca88f --- /dev/null +++ b/ydb/library/yql/public/purecalc/ut/CMakeLists.linux-aarch64.txt @@ -0,0 +1,81 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(lib) +add_subdirectory(protos) + +add_executable(ydb-library-yql-public-purecalc-ut) +target_compile_options(ydb-library-yql-public-purecalc-ut PRIVATE + -DUSE_CURRENT_UDF_ABI_VERSION +) +target_link_libraries(ydb-library-yql-public-purecalc-ut PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + cpp-testing-unittest_main + yql-public-purecalc + purecalc-io_specs-protobuf + purecalc-ut-protos +) +target_link_options(ydb-library-yql-public-purecalc-ut PRIVATE + -ldl + -lrt + -Wl,--no-as-needed + -fPIC + -fPIC + -lpthread + -lrt + -ldl +) +target_sources(ydb-library-yql-public-purecalc-ut PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/fake_spec.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/test_schema.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/test_sexpr.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/test_sql.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/test_udf.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/test_user_data.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/test_eval.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/test_pool.cpp +) +set_property( + TARGET + ydb-library-yql-public-purecalc-ut + PROPERTY + SPLIT_FACTOR + 1 +) +add_yunittest( + NAME + ydb-library-yql-public-purecalc-ut + TEST_TARGET + ydb-library-yql-public-purecalc-ut + TEST_ARG + --print-before-suite + --print-before-test + --fork-tests + --print-times + --show-fails +) +set_yunittest_property( + TEST + ydb-library-yql-public-purecalc-ut + PROPERTY + LABELS + MEDIUM +) +set_yunittest_property( + TEST + ydb-library-yql-public-purecalc-ut + PROPERTY + PROCESSORS + 1 +) +target_allocator(ydb-library-yql-public-purecalc-ut + cpp-malloc-jemalloc +) +vcs_info(ydb-library-yql-public-purecalc-ut) diff --git a/ydb/library/yql/public/purecalc/ut/CMakeLists.linux-x86_64.txt b/ydb/library/yql/public/purecalc/ut/CMakeLists.linux-x86_64.txt new file mode 100644 index 0000000000..5c64f772cb --- /dev/null +++ b/ydb/library/yql/public/purecalc/ut/CMakeLists.linux-x86_64.txt @@ -0,0 +1,83 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(lib) +add_subdirectory(protos) + +add_executable(ydb-library-yql-public-purecalc-ut) +target_compile_options(ydb-library-yql-public-purecalc-ut PRIVATE + -DUSE_CURRENT_UDF_ABI_VERSION +) +target_link_libraries(ydb-library-yql-public-purecalc-ut PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + library-cpp-cpuid_check + cpp-testing-unittest_main + yql-public-purecalc + purecalc-io_specs-protobuf + purecalc-ut-protos +) +target_link_options(ydb-library-yql-public-purecalc-ut PRIVATE + -ldl + -lrt + -Wl,--no-as-needed + -fPIC + -fPIC + -lpthread + -lrt + -ldl +) +target_sources(ydb-library-yql-public-purecalc-ut PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/fake_spec.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/test_schema.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/test_sexpr.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/test_sql.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/test_udf.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/test_user_data.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/test_eval.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/test_pool.cpp +) +set_property( + TARGET + ydb-library-yql-public-purecalc-ut + PROPERTY + SPLIT_FACTOR + 1 +) +add_yunittest( + NAME + ydb-library-yql-public-purecalc-ut + TEST_TARGET + ydb-library-yql-public-purecalc-ut + TEST_ARG + --print-before-suite + --print-before-test + --fork-tests + --print-times + --show-fails +) +set_yunittest_property( + TEST + ydb-library-yql-public-purecalc-ut + PROPERTY + LABELS + MEDIUM +) +set_yunittest_property( + TEST + ydb-library-yql-public-purecalc-ut + PROPERTY + PROCESSORS + 1 +) +target_allocator(ydb-library-yql-public-purecalc-ut + cpp-malloc-tcmalloc + libs-tcmalloc-no_percpu_cache +) +vcs_info(ydb-library-yql-public-purecalc-ut) diff --git a/ydb/library/yql/public/purecalc/ut/CMakeLists.txt b/ydb/library/yql/public/purecalc/ut/CMakeLists.txt new file mode 100644 index 0000000000..f8b31df0c1 --- /dev/null +++ b/ydb/library/yql/public/purecalc/ut/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) + include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/ydb/library/yql/public/purecalc/ut/CMakeLists.windows-x86_64.txt b/ydb/library/yql/public/purecalc/ut/CMakeLists.windows-x86_64.txt new file mode 100644 index 0000000000..eb1ce4c4c3 --- /dev/null +++ b/ydb/library/yql/public/purecalc/ut/CMakeLists.windows-x86_64.txt @@ -0,0 +1,71 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(lib) +add_subdirectory(protos) + +add_executable(ydb-library-yql-public-purecalc-ut) +target_compile_options(ydb-library-yql-public-purecalc-ut PRIVATE + -DUSE_CURRENT_UDF_ABI_VERSION +) +target_link_libraries(ydb-library-yql-public-purecalc-ut PUBLIC + contrib-libs-cxxsupp + yutil + library-cpp-cpuid_check + cpp-testing-unittest_main + yql-public-purecalc + purecalc-io_specs-protobuf + purecalc-ut-protos +) +target_sources(ydb-library-yql-public-purecalc-ut PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/fake_spec.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/test_schema.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/test_sexpr.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/test_sql.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/test_udf.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/test_user_data.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/test_eval.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/test_pool.cpp +) +set_property( + TARGET + ydb-library-yql-public-purecalc-ut + PROPERTY + SPLIT_FACTOR + 1 +) +add_yunittest( + NAME + ydb-library-yql-public-purecalc-ut + TEST_TARGET + ydb-library-yql-public-purecalc-ut + TEST_ARG + --print-before-suite + --print-before-test + --fork-tests + --print-times + --show-fails +) +set_yunittest_property( + TEST + ydb-library-yql-public-purecalc-ut + PROPERTY + LABELS + MEDIUM +) +set_yunittest_property( + TEST + ydb-library-yql-public-purecalc-ut + PROPERTY + PROCESSORS + 1 +) +target_allocator(ydb-library-yql-public-purecalc-ut + system_allocator +) +vcs_info(ydb-library-yql-public-purecalc-ut) diff --git a/ydb/library/yql/public/purecalc/ut/empty_stream.h b/ydb/library/yql/public/purecalc/ut/empty_stream.h new file mode 100644 index 0000000000..246aabd423 --- /dev/null +++ b/ydb/library/yql/public/purecalc/ut/empty_stream.h @@ -0,0 +1,20 @@ +#pragma once + +#include <ydb/library/yql/public/purecalc/purecalc.h> + +namespace NYql { + namespace NPureCalc { + template <typename T> + class TEmptyStreamImpl: public IStream<T> { + public: + T Fetch() override { + return nullptr; + } + }; + + template <typename T> + THolder<IStream<T>> EmptyStream() { + return MakeHolder<TEmptyStreamImpl<T>>(); + } + } +} diff --git a/ydb/library/yql/public/purecalc/ut/fake_spec.cpp b/ydb/library/yql/public/purecalc/ut/fake_spec.cpp new file mode 100644 index 0000000000..4e45e76bc1 --- /dev/null +++ b/ydb/library/yql/public/purecalc/ut/fake_spec.cpp @@ -0,0 +1,36 @@ +#include "fake_spec.h" + +namespace NYql { + namespace NPureCalc { + NYT::TNode MakeFakeSchema() { + auto itemType = NYT::TNode::CreateList(); + itemType.Add("DataType"); + itemType.Add("Int32"); + + auto itemNode = NYT::TNode::CreateList(); + itemNode.Add("Name"); + itemNode.Add(std::move(itemType)); + + auto items = NYT::TNode::CreateList(); + items.Add(std::move(itemNode)); + + auto schema = NYT::TNode::CreateList(); + schema.Add("StructType"); + schema.Add(std::move(items)); + + return schema; + } + + TFakeInputSpec FakeIS(ui32 inputsNumber) { + auto spec = TFakeInputSpec(); + spec.Schemas = TVector<NYT::TNode>(inputsNumber, MakeFakeSchema()); + return spec; + } + + TFakeOutputSpec FakeOS() { + auto spec = TFakeOutputSpec(); + spec.Schema = MakeFakeSchema(); + return spec; + } + } +} diff --git a/ydb/library/yql/public/purecalc/ut/fake_spec.h b/ydb/library/yql/public/purecalc/ut/fake_spec.h new file mode 100644 index 0000000000..0b0e9e02ec --- /dev/null +++ b/ydb/library/yql/public/purecalc/ut/fake_spec.h @@ -0,0 +1,54 @@ +#pragma once + +#include <ydb/library/yql/public/purecalc/purecalc.h> + +namespace NYql { + namespace NPureCalc { + class TFakeInputSpec: public TInputSpecBase { + public: + TVector<NYT::TNode> Schemas = {NYT::TNode::CreateList()}; + + public: + const TVector<NYT::TNode>& GetSchemas() const override { + return Schemas; + } + }; + + class TFakeOutputSpec: public TOutputSpecBase { + public: + NYT::TNode Schema = NYT::TNode::CreateList(); + + public: + const NYT::TNode& GetSchema() const override { + return Schema; + } + }; + + template <> + struct TInputSpecTraits<TFakeInputSpec> { + static const constexpr bool IsPartial = false; + + static const constexpr bool SupportPullStreamMode = false; + static const constexpr bool SupportPullListMode = false; + static const constexpr bool SupportPushStreamMode = false; + + using TConsumerType = void; + }; + + template <> + struct TOutputSpecTraits<TFakeOutputSpec> { + static const constexpr bool IsPartial = false; + + static const constexpr bool SupportPullStreamMode = false; + static const constexpr bool SupportPullListMode = false; + static const constexpr bool SupportPushStreamMode = false; + + using TPullStreamReturnType = void; + using TPullListReturnType = void; + }; + + NYT::TNode MakeFakeSchema(); + TFakeInputSpec FakeIS(ui32 inputsNumber = 1); + TFakeOutputSpec FakeOS(); + } +} diff --git a/ydb/library/yql/public/purecalc/ut/lib/CMakeLists.darwin-x86_64.txt b/ydb/library/yql/public/purecalc/ut/lib/CMakeLists.darwin-x86_64.txt new file mode 100644 index 0000000000..36bfa6b4d5 --- /dev/null +++ b/ydb/library/yql/public/purecalc/ut/lib/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,19 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(purecalc-ut-lib) +target_link_libraries(purecalc-ut-lib PUBLIC + contrib-libs-cxxsupp + yutil + library-cpp-yson + cpp-yson-node +) +target_sources(purecalc-ut-lib PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/lib/helpers.cpp +) diff --git a/ydb/library/yql/public/purecalc/ut/lib/CMakeLists.linux-aarch64.txt b/ydb/library/yql/public/purecalc/ut/lib/CMakeLists.linux-aarch64.txt new file mode 100644 index 0000000000..b32b5970c1 --- /dev/null +++ b/ydb/library/yql/public/purecalc/ut/lib/CMakeLists.linux-aarch64.txt @@ -0,0 +1,20 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(purecalc-ut-lib) +target_link_libraries(purecalc-ut-lib PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + library-cpp-yson + cpp-yson-node +) +target_sources(purecalc-ut-lib PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/lib/helpers.cpp +) diff --git a/ydb/library/yql/public/purecalc/ut/lib/CMakeLists.linux-x86_64.txt b/ydb/library/yql/public/purecalc/ut/lib/CMakeLists.linux-x86_64.txt new file mode 100644 index 0000000000..b32b5970c1 --- /dev/null +++ b/ydb/library/yql/public/purecalc/ut/lib/CMakeLists.linux-x86_64.txt @@ -0,0 +1,20 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(purecalc-ut-lib) +target_link_libraries(purecalc-ut-lib PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + library-cpp-yson + cpp-yson-node +) +target_sources(purecalc-ut-lib PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/lib/helpers.cpp +) diff --git a/ydb/library/yql/public/purecalc/ut/lib/CMakeLists.txt b/ydb/library/yql/public/purecalc/ut/lib/CMakeLists.txt new file mode 100644 index 0000000000..f8b31df0c1 --- /dev/null +++ b/ydb/library/yql/public/purecalc/ut/lib/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) + include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/ydb/library/yql/public/purecalc/ut/lib/CMakeLists.windows-x86_64.txt b/ydb/library/yql/public/purecalc/ut/lib/CMakeLists.windows-x86_64.txt new file mode 100644 index 0000000000..36bfa6b4d5 --- /dev/null +++ b/ydb/library/yql/public/purecalc/ut/lib/CMakeLists.windows-x86_64.txt @@ -0,0 +1,19 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(purecalc-ut-lib) +target_link_libraries(purecalc-ut-lib PUBLIC + contrib-libs-cxxsupp + yutil + library-cpp-yson + cpp-yson-node +) +target_sources(purecalc-ut-lib PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/lib/helpers.cpp +) diff --git a/ydb/library/yql/public/purecalc/ut/lib/helpers.cpp b/ydb/library/yql/public/purecalc/ut/lib/helpers.cpp new file mode 100644 index 0000000000..cef9a99523 --- /dev/null +++ b/ydb/library/yql/public/purecalc/ut/lib/helpers.cpp @@ -0,0 +1,55 @@ +#include "helpers.h" + +#include <library/cpp/yson/writer.h> + +#include <library/cpp/yson/node/node_visitor.h> + +#include <util/string/ascii.h> +#include <util/generic/hash_set.h> + + +namespace NYql { + namespace NPureCalc { + namespace NPrivate { + NYT::TNode GetSchema( + const TVector<TString>& fields, + const TVector<TString>& optionalFields + ) { + THashSet<TString> optionalFilter { + optionalFields.begin(), optionalFields.end() + }; + + NYT::TNode members {NYT::TNode::CreateList()}; + + auto addField = [&] (const TString& name, const TString& type) { + auto typeNode = NYT::TNode::CreateList() + .Add("DataType") + .Add(type); + + if (optionalFilter.contains(name)) { + typeNode = NYT::TNode::CreateList() + .Add("OptionalType") + .Add(typeNode); + } + + members.Add(NYT::TNode::CreateList() + .Add(name) + .Add(typeNode) + ); + }; + + for (const auto& field: fields) { + TString type {field}; + type[0] = AsciiToUpper(type[0]); + addField(field, type); + } + + NYT::TNode schema = NYT::TNode::CreateList() + .Add("StructType") + .Add(members); + + return schema; + } + } + } +} diff --git a/ydb/library/yql/public/purecalc/ut/lib/helpers.h b/ydb/library/yql/public/purecalc/ut/lib/helpers.h new file mode 100644 index 0000000000..53a22661ec --- /dev/null +++ b/ydb/library/yql/public/purecalc/ut/lib/helpers.h @@ -0,0 +1,18 @@ +#pragma once + +#include <library/cpp/yson/node/node.h> +#include <util/generic/string.h> +#include <util/generic/vector.h> +#include <util/stream/str.h> + + +namespace NYql { + namespace NPureCalc { + namespace NPrivate { + NYT::TNode GetSchema( + const TVector<TString>& fields, + const TVector<TString>& optionalFields = {} + ); + } + } +} diff --git a/ydb/library/yql/public/purecalc/ut/lib/ya.make b/ydb/library/yql/public/purecalc/ut/lib/ya.make new file mode 100644 index 0000000000..df3ba8eab2 --- /dev/null +++ b/ydb/library/yql/public/purecalc/ut/lib/ya.make @@ -0,0 +1,13 @@ +LIBRARY() + +PEERDIR( + library/cpp/yson + library/cpp/yson/node +) + +SRCS( + helpers.cpp + helpers.h +) + +END() diff --git a/ydb/library/yql/public/purecalc/ut/protos/CMakeLists.darwin-x86_64.txt b/ydb/library/yql/public/purecalc/ut/protos/CMakeLists.darwin-x86_64.txt new file mode 100644 index 0000000000..12f10544f8 --- /dev/null +++ b/ydb/library/yql/public/purecalc/ut/protos/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,43 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) + +add_library(purecalc-ut-protos) +target_link_libraries(purecalc-ut-protos PUBLIC + contrib-libs-cxxsupp + yutil + contrib-libs-protobuf +) +target_proto_messages(purecalc-ut-protos PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/protos/test_structs.proto +) +target_proto_addincls(purecalc-ut-protos + ./ + ${CMAKE_SOURCE_DIR}/ + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src +) +target_proto_outs(purecalc-ut-protos + --cpp_out=${CMAKE_BINARY_DIR}/ + --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ +) diff --git a/ydb/library/yql/public/purecalc/ut/protos/CMakeLists.linux-aarch64.txt b/ydb/library/yql/public/purecalc/ut/protos/CMakeLists.linux-aarch64.txt new file mode 100644 index 0000000000..806ee80165 --- /dev/null +++ b/ydb/library/yql/public/purecalc/ut/protos/CMakeLists.linux-aarch64.txt @@ -0,0 +1,44 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) + +add_library(purecalc-ut-protos) +target_link_libraries(purecalc-ut-protos PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + contrib-libs-protobuf +) +target_proto_messages(purecalc-ut-protos PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/protos/test_structs.proto +) +target_proto_addincls(purecalc-ut-protos + ./ + ${CMAKE_SOURCE_DIR}/ + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src +) +target_proto_outs(purecalc-ut-protos + --cpp_out=${CMAKE_BINARY_DIR}/ + --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ +) diff --git a/ydb/library/yql/public/purecalc/ut/protos/CMakeLists.linux-x86_64.txt b/ydb/library/yql/public/purecalc/ut/protos/CMakeLists.linux-x86_64.txt new file mode 100644 index 0000000000..806ee80165 --- /dev/null +++ b/ydb/library/yql/public/purecalc/ut/protos/CMakeLists.linux-x86_64.txt @@ -0,0 +1,44 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) + +add_library(purecalc-ut-protos) +target_link_libraries(purecalc-ut-protos PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + contrib-libs-protobuf +) +target_proto_messages(purecalc-ut-protos PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/protos/test_structs.proto +) +target_proto_addincls(purecalc-ut-protos + ./ + ${CMAKE_SOURCE_DIR}/ + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src +) +target_proto_outs(purecalc-ut-protos + --cpp_out=${CMAKE_BINARY_DIR}/ + --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ +) diff --git a/ydb/library/yql/public/purecalc/ut/protos/CMakeLists.txt b/ydb/library/yql/public/purecalc/ut/protos/CMakeLists.txt new file mode 100644 index 0000000000..f8b31df0c1 --- /dev/null +++ b/ydb/library/yql/public/purecalc/ut/protos/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) + include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/ydb/library/yql/public/purecalc/ut/protos/CMakeLists.windows-x86_64.txt b/ydb/library/yql/public/purecalc/ut/protos/CMakeLists.windows-x86_64.txt new file mode 100644 index 0000000000..12f10544f8 --- /dev/null +++ b/ydb/library/yql/public/purecalc/ut/protos/CMakeLists.windows-x86_64.txt @@ -0,0 +1,43 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) + +add_library(purecalc-ut-protos) +target_link_libraries(purecalc-ut-protos PUBLIC + contrib-libs-cxxsupp + yutil + contrib-libs-protobuf +) +target_proto_messages(purecalc-ut-protos PRIVATE + ${CMAKE_SOURCE_DIR}/ydb/library/yql/public/purecalc/ut/protos/test_structs.proto +) +target_proto_addincls(purecalc-ut-protos + ./ + ${CMAKE_SOURCE_DIR}/ + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src +) +target_proto_outs(purecalc-ut-protos + --cpp_out=${CMAKE_BINARY_DIR}/ + --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ +) diff --git a/ydb/library/yql/public/purecalc/ut/protos/test_structs.proto b/ydb/library/yql/public/purecalc/ut/protos/test_structs.proto new file mode 100644 index 0000000000..66593005a5 --- /dev/null +++ b/ydb/library/yql/public/purecalc/ut/protos/test_structs.proto @@ -0,0 +1,122 @@ +package NPureCalcProto; + +message TUnparsed { + required string S = 1; +} + +message TParsed { + required int32 A = 1; + optional int32 B = 2; + required int32 C = 3; +} + +message TPartial { + required int32 X = 1; +} + +message TSimpleMessage { + required int32 X = 1; +} + +message TNamedSimpleMessage { + required int32 X = 1; + required bytes Name = 2; +} + +message TStringMessage { + required string X = 1; +} + +message TAllTypes { + required double FDouble = 1; + required float FFloat = 2; + required int64 FInt64 = 3; + required sfixed64 FSfixed64 = 4; + required sint64 FSint64 = 5; + required uint64 FUint64 = 6; + required fixed64 FFixed64 = 7; + required int32 FInt32 = 8; + required sfixed32 FSfixed32 = 9; + required sint32 FSint32 = 10; + required uint32 FUint32 = 11; + required fixed32 FFixed32 = 12; + required bool FBool = 13; + required string FString = 14; + required bytes FBytes = 15; +} + +message TOptionalAllTypes { + optional double FDouble = 1; + optional float FFloat = 2; + optional int64 FInt64 = 3; + optional sfixed64 FSfixed64 = 4; + optional sint64 FSint64 = 5; + optional uint64 FUint64 = 6; + optional fixed64 FFixed64 = 7; + optional int32 FInt32 = 8; + optional sfixed32 FSfixed32 = 9; + optional sint32 FSint32 = 10; + optional uint32 FUint32 = 11; + optional fixed32 FFixed32 = 12; + optional bool FBool = 13; + optional string FString = 14; + optional bytes FBytes = 15; +} + +message TSimpleNested { + required int32 X = 1; + required TAllTypes Y = 2; +} + +message TOptionalNested { + optional TAllTypes X = 1; +} + +message TSimpleRepeated { + required int32 X = 1; + repeated int32 Y = 2; +} + +message TNestedRepeated { + required int32 X = 1; + repeated TSimpleNested Y = 2; +} + +message TRecursive { + required int32 X = 1; + required TRecursive Nested = 2; +} + +message TRecursiveIndirectly { + message TNested { + required TRecursiveIndirectly Nested = 1; + } + + required int32 X = 1; + repeated TNested Nested = 2; +} + +message TMessageWithEnum { + enum ETestEnum { + VALUE1 = 0; + VALUE2 = 1; + } + repeated ETestEnum EnumValue = 1; +} + +message TUnsplitted { + required int32 AInt = 1; + required uint32 AUint = 2; + required string AString = 3; + optional bool ABool = 4; +} + +message TSplitted1 { + required int32 BInt = 1; + required string BString = 2; +} + +message TSplitted2 { + required uint32 CUint = 1; + required string CString = 2; +} diff --git a/ydb/library/yql/public/purecalc/ut/protos/ya.make b/ydb/library/yql/public/purecalc/ut/protos/ya.make new file mode 100644 index 0000000000..a455ff2fba --- /dev/null +++ b/ydb/library/yql/public/purecalc/ut/protos/ya.make @@ -0,0 +1,9 @@ +PROTO_LIBRARY() + +SRCS( + test_structs.proto +) + +EXCLUDE_TAGS(GO_PROTO) + +END() diff --git a/ydb/library/yql/public/purecalc/ut/test_eval.cpp b/ydb/library/yql/public/purecalc/ut/test_eval.cpp new file mode 100644 index 0000000000..a556b47b03 --- /dev/null +++ b/ydb/library/yql/public/purecalc/ut/test_eval.cpp @@ -0,0 +1,30 @@ +#include <ydb/library/yql/public/purecalc/purecalc.h> +#include <ydb/library/yql/public/purecalc/io_specs/protobuf/spec.h> +#include <ydb/library/yql/public/purecalc/ut/protos/test_structs.pb.h> +#include <ydb/library/yql/public/purecalc/ut/empty_stream.h> + +#include <library/cpp/testing/unittest/registar.h> + +Y_UNIT_TEST_SUITE(TestEval) { + Y_UNIT_TEST(TestEvalExpr) { + using namespace NYql::NPureCalc; + + auto options = TProgramFactoryOptions(); + auto factory = MakeProgramFactory(options); + + auto program = factory->MakePullListProgram( + TProtobufInputSpec<NPureCalcProto::TStringMessage>(), + TProtobufOutputSpec<NPureCalcProto::TStringMessage>(), + "SELECT Unwrap(cast(EvaluateExpr('foo' || 'bar') as Utf8)) AS X", + ETranslationMode::SQL + ); + + auto stream = program->Apply(EmptyStream<NPureCalcProto::TStringMessage*>()); + + NPureCalcProto::TStringMessage* message; + + UNIT_ASSERT(message = stream->Fetch()); + UNIT_ASSERT_EQUAL(message->GetX(), "foobar"); + UNIT_ASSERT(!stream->Fetch()); + } +} diff --git a/ydb/library/yql/public/purecalc/ut/test_pool.cpp b/ydb/library/yql/public/purecalc/ut/test_pool.cpp new file mode 100644 index 0000000000..8c80ae9c84 --- /dev/null +++ b/ydb/library/yql/public/purecalc/ut/test_pool.cpp @@ -0,0 +1,184 @@ +#include <library/cpp/testing/unittest/registar.h> + +#include <ydb/library/yql/public/purecalc/common/interface.h> +#include <ydb/library/yql/public/purecalc/io_specs/protobuf/spec.h> +#include <ydb/library/yql/public/purecalc/ut/protos/test_structs.pb.h> +#include <library/cpp/protobuf/util/pb_io.h> + +#include <util/string/cast.h> + +using namespace NYql::NPureCalc; + +namespace { + class TStringMessageStreamImpl: public IStream<NPureCalcProto::TStringMessage*> { + private: + ui32 I_ = 0; + NPureCalcProto::TStringMessage Message_{}; + + public: + NPureCalcProto::TStringMessage* Fetch() override { + if (I_ >= 3) { + return nullptr; + } else { + Message_.SetX(ToString(I_)); + ++I_; + return &Message_; + } + } + }; + + class TStringMessageConsumerImpl: public IConsumer<NPureCalcProto::TStringMessage*> { + private: + TVector<TString>* Buf_; + + public: + TStringMessageConsumerImpl(TVector<TString>* buf) + : Buf_(buf) + { + } + + public: + void OnObject(NPureCalcProto::TStringMessage* t) override { + Buf_->push_back(t->GetX()); + } + + void OnFinish() override { + } + }; + +} + +Y_UNIT_TEST_SUITE(TestWorkerPool) { + static TString sql = "SELECT 'abc'u || X AS X FROM Input"; + + static TVector<TString> expected{"abc0", "abc1", "abc2"}; + + void TestPullStreamImpl(bool useWorkerPool) { + auto factory = MakeProgramFactory(TProgramFactoryOptions().SetUseWorkerPool(useWorkerPool)); + + auto program = factory->MakePullStreamProgram( + TProtobufInputSpec<NPureCalcProto::TStringMessage>(), + TProtobufOutputSpec<NPureCalcProto::TStringMessage>(), + sql, + ETranslationMode::SQL + ); + + auto check = [](IStream<NPureCalcProto::TStringMessage*>* output) { + TVector<TString> actual; + while (auto *x = output->Fetch()) { + actual.push_back(x->GetX()); + } + + UNIT_ASSERT_VALUES_EQUAL(expected, actual); + }; + + // Sequential use + for (size_t i = 0; i < 2; ++i) { + auto output = program->Apply(MakeHolder<TStringMessageStreamImpl>()); + check(output.Get()); + } + // Parallel use + { + auto output1 = program->Apply(MakeHolder<TStringMessageStreamImpl>()); + auto output2 = program->Apply(MakeHolder<TStringMessageStreamImpl>()); + check(output1.Get()); + check(output2.Get()); + } + } + + Y_UNIT_TEST(TestPullStreamUseWorkerPool) { + TestPullStreamImpl(true); + } + + Y_UNIT_TEST(TestPullStreamNoWorkerPool) { + TestPullStreamImpl(false); + } + + void TestPullListImpl(bool useWorkerPool) { + auto factory = MakeProgramFactory(TProgramFactoryOptions().SetUseWorkerPool(useWorkerPool)); + + auto program = factory->MakePullListProgram( + TProtobufInputSpec<NPureCalcProto::TStringMessage>(), + TProtobufOutputSpec<NPureCalcProto::TStringMessage>(), + sql, + ETranslationMode::SQL + ); + + auto check = [](IStream<NPureCalcProto::TStringMessage*>* output) { + TVector<TString> actual; + while (auto *x = output->Fetch()) { + actual.push_back(x->GetX()); + } + + UNIT_ASSERT_VALUES_EQUAL(expected, actual); + }; + + // Sequential use + for (size_t i = 0; i < 2; ++i) { + auto output = program->Apply(MakeHolder<TStringMessageStreamImpl>()); + check(output.Get()); + } + // Parallel use + { + auto output1 = program->Apply(MakeHolder<TStringMessageStreamImpl>()); + auto output2 = program->Apply(MakeHolder<TStringMessageStreamImpl>()); + check(output1.Get()); + check(output2.Get()); + } + } + + Y_UNIT_TEST(TestPullListUseWorkerPool) { + TestPullListImpl(true); + } + + Y_UNIT_TEST(TestPullListNoWorkerPool) { + TestPullListImpl(false); + } + + void TestPushStreamImpl(bool useWorkerPool) { + auto factory = MakeProgramFactory(TProgramFactoryOptions().SetUseWorkerPool(useWorkerPool)); + + auto program = factory->MakePushStreamProgram( + TProtobufInputSpec<NPureCalcProto::TStringMessage>(), + TProtobufOutputSpec<NPureCalcProto::TStringMessage>(), + sql, + ETranslationMode::SQL + ); + + auto check = [](IConsumer<NPureCalcProto::TStringMessage*>* input, const TVector<TString>& result) { + NPureCalcProto::TStringMessage message; + for (auto s: {"0", "1", "2"}) { + message.SetX(s); + input->OnObject(&message); + } + input->OnFinish(); + + UNIT_ASSERT_VALUES_EQUAL(expected, result); + }; + + // Sequential use + for (size_t i = 0; i < 2; ++i) { + TVector<TString> actual; + auto input = program->Apply(MakeHolder<TStringMessageConsumerImpl>(&actual)); + check(input.Get(), actual); + } + + // Parallel use + { + TVector<TString> actual1; + auto input1 = program->Apply(MakeHolder<TStringMessageConsumerImpl>(&actual1)); + TVector<TString> actual2; + auto input2 = program->Apply(MakeHolder<TStringMessageConsumerImpl>(&actual2)); + check(input1.Get(), actual1); + check(input2.Get(), actual2); + } + } + + Y_UNIT_TEST(TestPushStreamUseWorkerPool) { + TestPushStreamImpl(true); + } + + Y_UNIT_TEST(TestPushStreamNoWorkerPool) { + TestPushStreamImpl(false); + } +} diff --git a/ydb/library/yql/public/purecalc/ut/test_schema.cpp b/ydb/library/yql/public/purecalc/ut/test_schema.cpp new file mode 100644 index 0000000000..9763e52b00 --- /dev/null +++ b/ydb/library/yql/public/purecalc/ut/test_schema.cpp @@ -0,0 +1 @@ +#include <library/cpp/testing/unittest/registar.h> diff --git a/ydb/library/yql/public/purecalc/ut/test_sexpr.cpp b/ydb/library/yql/public/purecalc/ut/test_sexpr.cpp new file mode 100644 index 0000000000..b9d55c0f98 --- /dev/null +++ b/ydb/library/yql/public/purecalc/ut/test_sexpr.cpp @@ -0,0 +1,55 @@ +#include <ydb/library/yql/public/purecalc/purecalc.h> + +#include "fake_spec.h" + +#include <ydb/library/yql/public/purecalc/ut/protos/test_structs.pb.h> + +#include <library/cpp/testing/unittest/registar.h> + +Y_UNIT_TEST_SUITE(TestSExpr) { + Y_UNIT_TEST(TestSExprCompile) { + using namespace NYql::NPureCalc; + + auto factory = MakeProgramFactory(); + + auto expr = TString(R"( + ( + (return (Self '0)) + ) + )"); + + UNIT_ASSERT_NO_EXCEPTION([&](){ + factory->MakePullStreamProgram(FakeIS(), FakeOS(), expr, ETranslationMode::SExpr); + }()); + + UNIT_ASSERT_NO_EXCEPTION([&](){ + factory->MakePullListProgram(FakeIS(), FakeOS(), expr, ETranslationMode::SExpr); + }()); + + UNIT_ASSERT_NO_EXCEPTION([&](){ + factory->MakePushStreamProgram(FakeIS(), FakeOS(), expr, ETranslationMode::SExpr); + }()); + } + + Y_UNIT_TEST(TestInvalidSExpr) { + using namespace NYql::NPureCalc; + + auto factory = MakeProgramFactory(); + + auto sql = TString(R"( + Some totally invalid SExpr + )"); + + UNIT_ASSERT_EXCEPTION_CONTAINS([&](){ + factory->MakePullStreamProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SExpr); + }(), TCompileError, "failed to parse s-expression"); + + UNIT_ASSERT_EXCEPTION_CONTAINS([&](){ + factory->MakePullListProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SExpr); + }(), TCompileError, "failed to parse s-expression"); + + UNIT_ASSERT_EXCEPTION_CONTAINS([&](){ + factory->MakePushStreamProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SExpr); + }(), TCompileError, "failed to parse s-expression"); + } +} diff --git a/ydb/library/yql/public/purecalc/ut/test_sql.cpp b/ydb/library/yql/public/purecalc/ut/test_sql.cpp new file mode 100644 index 0000000000..10157912a9 --- /dev/null +++ b/ydb/library/yql/public/purecalc/ut/test_sql.cpp @@ -0,0 +1,205 @@ +#include <ydb/library/yql/public/purecalc/purecalc.h> + +#include "fake_spec.h" + +#include <ydb/library/yql/public/purecalc/ut/protos/test_structs.pb.h> + +#include <library/cpp/testing/unittest/registar.h> + +Y_UNIT_TEST_SUITE(TestSql) { + using namespace NYql::NPureCalc; + + Y_UNIT_TEST(TestSqlCompile) { + auto factory = MakeProgramFactory(); + + auto sql = TString(R"( + SELECT * FROM Input; + )"); + + UNIT_ASSERT_NO_EXCEPTION([&](){ + factory->MakePullStreamProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL); + }()); + + UNIT_ASSERT_NO_EXCEPTION([&](){ + factory->MakePullListProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL); + }()); + + auto program = factory->MakePushStreamProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL); + auto expectedIssues = TString(R"(<main>: Warning: Type annotation, code: 1030 + generated.sql:2:13: Warning: At function: PersistableRepr + generated.sql:2:13: Warning: Persistable required. Atom, key, world, datasink, datasource, callable, resource, stream and lambda are not persistable, code: 1104 +)"); + + UNIT_ASSERT_VALUES_EQUAL(expectedIssues, program->GetIssues().ToString()); + } + + Y_UNIT_TEST(TestSqlCompileSingleUnnamedInput) { + auto factory = MakeProgramFactory(); + + auto sql = TString(R"( + SELECT * FROM TABLES() + )"); + + UNIT_ASSERT_NO_EXCEPTION([&](){ + factory->MakePullStreamProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL); + }()); + + UNIT_ASSERT_NO_EXCEPTION([&](){ + factory->MakePullListProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL); + }()); + + UNIT_ASSERT_NO_EXCEPTION([&](){ + factory->MakePushStreamProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL); + }()); + } + + Y_UNIT_TEST(TestSqlCompileNamedMultiinputs) { + auto factory = MakeProgramFactory(); + + auto sql = TString(R"( + SELECT * FROM Input0 + UNION ALL + SELECT * FROM Input1 + )"); + + UNIT_ASSERT_NO_EXCEPTION([&](){ + factory->MakePullListProgram(FakeIS(2), FakeOS(), sql, ETranslationMode::SQL); + }()); + } + + Y_UNIT_TEST(TestSqlCompileUnnamedMultiinputs) { + auto factory = MakeProgramFactory(); + + auto sql = TString(R"( + $t0, $t1, $t2 = PROCESS TABLES(); + SELECT * FROM $t0 + UNION ALL + SELECT * FROM $t1 + UNION ALL + SELECT * FROM $t2 + )"); + + UNIT_ASSERT_NO_EXCEPTION([&](){ + factory->MakePullListProgram(FakeIS(3), FakeOS(), sql, ETranslationMode::SQL); + }()); + } + + Y_UNIT_TEST(TestSqlCompileWithWarning) { + auto factory = MakeProgramFactory(); + + auto sql = TString(R"( + $x = 1; + $y = 2; + SELECT $x as Name FROM Input; + )"); + + auto expectedIssues = TString(R"(generated.sql:3:13: Warning: Symbol $y is not used, code: 4527 +<main>: Warning: Type annotation, code: 1030 + generated.sql:4:13: Warning: At function: PersistableRepr + generated.sql:4:13: Warning: Persistable required. Atom, key, world, datasink, datasource, callable, resource, stream and lambda are not persistable, code: 1104 +)"); + + auto program = factory->MakePushStreamProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL); + UNIT_ASSERT_VALUES_EQUAL(expectedIssues, program->GetIssues().ToString()); + } + + Y_UNIT_TEST(TestSqlWrongTableName) { + auto factory = MakeProgramFactory(); + + auto sql = TString(R"( + SELECT * FROM WrongTable; + )"); + + UNIT_ASSERT_EXCEPTION_CONTAINS([&](){ + factory->MakePullStreamProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL); + }(), TCompileError, "Failed to optimize"); + + UNIT_ASSERT_EXCEPTION_CONTAINS([&](){ + factory->MakePullListProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL); + }(), TCompileError, "Failed to optimize"); + + UNIT_ASSERT_EXCEPTION_CONTAINS([&](){ + factory->MakePushStreamProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL); + }(), TCompileError, "Failed to optimize"); + } + + Y_UNIT_TEST(TestAllocateLargeStringOnEvaluate) { + auto factory = MakeProgramFactory(); + + auto sql = TString(R"( + $data = Length(EvaluateExpr("long string" || " very loooong string")); + SELECT $data as Name FROM Input; + )"); + + UNIT_ASSERT_NO_EXCEPTION([&](){ + factory->MakePullStreamProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL); + }()); + + UNIT_ASSERT_NO_EXCEPTION([&](){ + factory->MakePullListProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL); + }()); + + UNIT_ASSERT_NO_EXCEPTION([&](){ + factory->MakePushStreamProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL); + }()); + } + + Y_UNIT_TEST(TestInvalidSql) { + auto factory = MakeProgramFactory(); + + auto sql = TString(R"( + Just some invalid SQL; + )"); + + UNIT_ASSERT_EXCEPTION_CONTAINS([&](){ + factory->MakePullStreamProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL); + }(), TCompileError, "failed to parse SQL"); + + UNIT_ASSERT_EXCEPTION_CONTAINS([&](){ + factory->MakePullListProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL); + }(), TCompileError, "failed to parse SQL"); + + UNIT_ASSERT_EXCEPTION_CONTAINS([&](){ + factory->MakePushStreamProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL); + }(), TCompileError, "failed to parse SQL"); + } + + Y_UNIT_TEST(TestUseProcess) { + auto factory = MakeProgramFactory(); + + auto sql = TString(R"( + $processor = ($row) -> ($row); + + PROCESS Input using $processor(TableRow()); + )"); + + UNIT_ASSERT_NO_EXCEPTION([&](){ + factory->MakePullStreamProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL); + }()); + + UNIT_ASSERT_NO_EXCEPTION([&](){ + factory->MakePullListProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL); + }()); + + UNIT_ASSERT_NO_EXCEPTION([&](){ + factory->MakePushStreamProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL); + }()); + } + + Y_UNIT_TEST(TestUseCodegen) { + auto factory = MakeProgramFactory(); + + auto sql = TString(R"( + $processor = ($row) -> { + $lambda = EvaluateCode(LambdaCode(($row) -> ($row))); + return $lambda($row); + }; + + PROCESS Input using $processor(TableRow()); + )"); + + UNIT_ASSERT_NO_EXCEPTION([&](){ + factory->MakePullListProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL); + }()); + } +} diff --git a/ydb/library/yql/public/purecalc/ut/test_udf.cpp b/ydb/library/yql/public/purecalc/ut/test_udf.cpp new file mode 100644 index 0000000000..a42326d521 --- /dev/null +++ b/ydb/library/yql/public/purecalc/ut/test_udf.cpp @@ -0,0 +1,195 @@ +#include <library/cpp/testing/unittest/registar.h> + +#include <ydb/library/yql/public/purecalc/purecalc.h> +#include <ydb/library/yql/public/purecalc/io_specs/protobuf/spec.h> +#include <ydb/library/yql/public/purecalc/ut/protos/test_structs.pb.h> +#include <ydb/library/yql/public/udf/udf_counter.h> +#include <ydb/library/yql/public/udf/udf_type_builder.h> +#include <library/cpp/testing/unittest/registar.h> + +class TMyModule : public NKikimr::NUdf::IUdfModule { +public: + class TFunc : public NKikimr::NUdf::TBoxedValue { + public: + TFunc(NKikimr::NUdf::TCounter counter, NKikimr::NUdf::TScopedProbe scopedProbe) + : Counter_(counter) + , ScopedProbe_(scopedProbe) + {} + + NKikimr::NUdf::TUnboxedValue Run(const NKikimr::NUdf::IValueBuilder* valueBuilder, const NKikimr::NUdf::TUnboxedValuePod* args) const override { + Y_UNUSED(valueBuilder); + with_lock(ScopedProbe_) { + Counter_.Inc(); + return NKikimr::NUdf::TUnboxedValuePod(args[0].Get<i32>()); + } + } + + private: + mutable NKikimr::NUdf::TCounter Counter_; + mutable NKikimr::NUdf::TScopedProbe ScopedProbe_; + }; + + void GetAllFunctions(NKikimr::NUdf::IFunctionsSink& sink) const override { + Y_UNUSED(sink); + } + + void BuildFunctionTypeInfo( + const NKikimr::NUdf::TStringRef& name, + NKikimr::NUdf::TType* userType, + const NKikimr::NUdf::TStringRef& typeConfig, + ui32 flags, + NKikimr::NUdf::IFunctionTypeInfoBuilder& builder) const override { + Y_UNUSED(userType); + Y_UNUSED(typeConfig); + Y_UNUSED(flags); + if (name == NKikimr::NUdf::TStringRef::Of("Func")) { + builder.SimpleSignature<i32(i32)>(); + builder.Implementation(new TFunc( + builder.GetCounter("FuncCalls",true), + builder.GetScopedProbe("FuncTime") + )); + } + } + + void CleanupOnTerminate() const override { + } +}; + +class TMyCountersProvider : public NKikimr::NUdf::ICountersProvider, public NKikimr::NUdf::IScopedProbeHost { +public: + TMyCountersProvider(i64* calls, TString* log) + : Calls_(calls) + , Log_(log) + {} + + NKikimr::NUdf::TCounter GetCounter(const NKikimr::NUdf::TStringRef& module, const NKikimr::NUdf::TStringRef& name, bool deriv) override { + UNIT_ASSERT_VALUES_EQUAL(module, "MyModule"); + UNIT_ASSERT_VALUES_EQUAL(name, "FuncCalls"); + UNIT_ASSERT_VALUES_EQUAL(deriv, true); + return NKikimr::NUdf::TCounter(Calls_); + } + + NKikimr::NUdf::TScopedProbe GetScopedProbe(const NKikimr::NUdf::TStringRef& module, const NKikimr::NUdf::TStringRef& name) override { + UNIT_ASSERT_VALUES_EQUAL(module, "MyModule"); + UNIT_ASSERT_VALUES_EQUAL(name, "FuncTime"); + return NKikimr::NUdf::TScopedProbe(Log_ ? this : nullptr, Log_); + } + + void Acquire(void* cookie) override { + UNIT_ASSERT(cookie == Log_); + *Log_ += "Enter\n"; + } + + void Release(void* cookie) override { + UNIT_ASSERT(cookie == Log_); + *Log_ += "Exit\n"; + } + +private: + i64* Calls_; + TString* Log_; +}; + +namespace NPureCalcProto { + class TUnparsed; + class TParsed; +} + +class TDocInput : public NYql::NPureCalc::IStream<NPureCalcProto::TUnparsed*> { +public: + NPureCalcProto::TUnparsed* Fetch() override { + if (Extracted) { + return nullptr; + } + + Extracted = true; + Msg.SetS("foo"); + return &Msg; + } + +public: + NPureCalcProto::TUnparsed Msg; + bool Extracted = false; +}; + +Y_UNIT_TEST_SUITE(TestUdf) { + Y_UNIT_TEST(TestCounters) { + using namespace NYql::NPureCalc; + + auto factory = MakeProgramFactory(); + + i64 callCounter = 0; + TMyCountersProvider myCountersProvider(&callCounter, nullptr); + factory->AddUdfModule("MyModule", new TMyModule); + factory->SetCountersProvider(&myCountersProvider); + + auto program = factory->MakePullStreamProgram( + TProtobufInputSpec<NPureCalcProto::TUnparsed>(), + TProtobufOutputSpec<NPureCalcProto::TParsed>(), + "select MyModule::Func(1) as A, 2 as B, 3 as C from Input", + ETranslationMode::SQL); + + auto out = program->Apply(MakeHolder<TDocInput>()); + auto* message = out->Fetch(); + UNIT_ASSERT(message); + UNIT_ASSERT_VALUES_EQUAL(message->GetA(), 1); + UNIT_ASSERT_VALUES_EQUAL(message->GetB(), 2); + UNIT_ASSERT_VALUES_EQUAL(message->GetC(), 3); + UNIT_ASSERT_VALUES_EQUAL(callCounter, 1); + UNIT_ASSERT(!out->Fetch()); + } + + Y_UNIT_TEST(TestCountersFilteredColumns) { + using namespace NYql::NPureCalc; + + auto factory = MakeProgramFactory(); + + i64 callCounter = 0; + TMyCountersProvider myCountersProvider(&callCounter, nullptr); + factory->AddUdfModule("MyModule", new TMyModule); + factory->SetCountersProvider(&myCountersProvider); + + auto ospec = TProtobufOutputSpec<NPureCalcProto::TParsed>(); + ospec.SetOutputColumnsFilter(THashSet<TString>({"B", "C"})); + auto program = factory->MakePullStreamProgram( + TProtobufInputSpec<NPureCalcProto::TUnparsed>(), + ospec, + "select MyModule::Func(1) as A, 2 as B, 3 as C from Input", + ETranslationMode::SQL); + + auto out = program->Apply(MakeHolder<TDocInput>()); + auto* message = out->Fetch(); + UNIT_ASSERT(message); + UNIT_ASSERT_VALUES_EQUAL(message->GetA(), 0); + UNIT_ASSERT_VALUES_EQUAL(message->GetB(), 2); + UNIT_ASSERT_VALUES_EQUAL(message->GetC(), 3); + UNIT_ASSERT_VALUES_EQUAL(callCounter, 0); + UNIT_ASSERT(!out->Fetch()); + } + + Y_UNIT_TEST(TestScopedProbes) { + using namespace NYql::NPureCalc; + + auto factory = MakeProgramFactory(); + + TString log; + TMyCountersProvider myCountersProvider(nullptr, &log); + factory->AddUdfModule("MyModule", new TMyModule); + factory->SetCountersProvider(&myCountersProvider); + + auto program = factory->MakePullStreamProgram( + TProtobufInputSpec<NPureCalcProto::TUnparsed>(), + TProtobufOutputSpec<NPureCalcProto::TParsed>(), + "select MyModule::Func(1) as A, 2 as B, 3 as C from Input", + ETranslationMode::SQL); + + auto out = program->Apply(MakeHolder<TDocInput>()); + auto* message = out->Fetch(); + UNIT_ASSERT(message); + UNIT_ASSERT_VALUES_EQUAL(message->GetA(), 1); + UNIT_ASSERT_VALUES_EQUAL(message->GetB(), 2); + UNIT_ASSERT_VALUES_EQUAL(message->GetC(), 3); + UNIT_ASSERT_VALUES_EQUAL(log, "Enter\nExit\n"); + UNIT_ASSERT(!out->Fetch()); + } +} diff --git a/ydb/library/yql/public/purecalc/ut/test_user_data.cpp b/ydb/library/yql/public/purecalc/ut/test_user_data.cpp new file mode 100644 index 0000000000..3d0a0935ef --- /dev/null +++ b/ydb/library/yql/public/purecalc/ut/test_user_data.cpp @@ -0,0 +1,62 @@ +#include <ydb/library/yql/public/purecalc/purecalc.h> +#include <ydb/library/yql/public/purecalc/io_specs/protobuf/spec.h> +#include <ydb/library/yql/public/purecalc/ut/protos/test_structs.pb.h> +#include <ydb/library/yql/public/purecalc/ut/empty_stream.h> + +#include <library/cpp/testing/unittest/registar.h> + +Y_UNIT_TEST_SUITE(TestUserData) { + Y_UNIT_TEST(TestUserData) { + using namespace NYql::NPureCalc; + + auto options = TProgramFactoryOptions() + .AddFile(NYql::NUserData::EDisposition::INLINE, "my_file.txt", "my content!"); + + auto factory = MakeProgramFactory(options); + + auto program = factory->MakePullListProgram( + TProtobufInputSpec<NPureCalcProto::TStringMessage>(), + TProtobufOutputSpec<NPureCalcProto::TStringMessage>(), + "SELECT UNWRAP(CAST(FileContent(\"my_file.txt\") AS Utf8)) AS X", + ETranslationMode::SQL + ); + + auto stream = program->Apply(EmptyStream<NPureCalcProto::TStringMessage*>()); + + NPureCalcProto::TStringMessage* message; + + UNIT_ASSERT(message = stream->Fetch()); + UNIT_ASSERT_EQUAL(message->GetX(), "my content!"); + UNIT_ASSERT(!stream->Fetch()); + } + + Y_UNIT_TEST(TestUserDataLibrary) { + using namespace NYql::NPureCalc; + + try { + auto options = TProgramFactoryOptions() + .AddLibrary(NYql::NUserData::EDisposition::INLINE, "a.sql", "$x = 1; EXPORT $x;") + .AddLibrary(NYql::NUserData::EDisposition::INLINE, "b.sql", "IMPORT a SYMBOLS $x; $y = CAST($x + 1 AS String); EXPORT $y;"); + + auto factory = MakeProgramFactory(options); + + auto program = factory->MakePullListProgram( + TProtobufInputSpec<NPureCalcProto::TStringMessage>(), + TProtobufOutputSpec<NPureCalcProto::TStringMessage>(), + "IMPORT b SYMBOLS $y; SELECT CAST($y AS Utf8) ?? '' AS X;", + ETranslationMode::SQL + ); + + auto stream = program->Apply(EmptyStream<NPureCalcProto::TStringMessage*>()); + + NPureCalcProto::TStringMessage* message; + + UNIT_ASSERT(message = stream->Fetch()); + UNIT_ASSERT_EQUAL(message->GetX(), "2"); + UNIT_ASSERT(!stream->Fetch()); + } catch (const TCompileError& e) { + Cerr << e; + throw e; + } + } +} diff --git a/ydb/library/yql/public/purecalc/ut/ya.make b/ydb/library/yql/public/purecalc/ut/ya.make new file mode 100644 index 0000000000..5b613a4669 --- /dev/null +++ b/ydb/library/yql/public/purecalc/ut/ya.make @@ -0,0 +1,26 @@ +UNITTEST() + +SRCS( + empty_stream.h + fake_spec.cpp + fake_spec.h + test_schema.cpp + test_sexpr.cpp + test_sql.cpp + test_udf.cpp + test_user_data.cpp + test_eval.cpp + test_pool.cpp +) + +PEERDIR( + ydb/library/yql/public/purecalc + ydb/library/yql/public/purecalc/io_specs/protobuf + ydb/library/yql/public/purecalc/ut/protos +) + +SIZE(MEDIUM) + +YQL_LAST_ABI_VERSION() + +END() diff --git a/ydb/library/yql/public/purecalc/ya.make b/ydb/library/yql/public/purecalc/ya.make new file mode 100644 index 0000000000..3ac6ab6b59 --- /dev/null +++ b/ydb/library/yql/public/purecalc/ya.make @@ -0,0 +1,19 @@ +LIBRARY() + +SRCS( + purecalc.cpp +) + +PEERDIR( + ydb/library/yql/public/udf/service/exception_policy + ydb/library/yql/public/purecalc/common +) + +YQL_LAST_ABI_VERSION() + +END() + +RECURSE_FOR_TESTS( + io_specs/ut + ut +) diff --git a/ydb/library/yql/public/ya.make b/ydb/library/yql/public/ya.make index 37dcc6368a..9fbf7e6a27 100644 --- a/ydb/library/yql/public/ya.make +++ b/ydb/library/yql/public/ya.make @@ -2,6 +2,10 @@ RECURSE( decimal fastcheck issue + purecalc + purecalc/examples + purecalc/helpers + purecalc/io_specs types udf udf/arrow diff --git a/yql/CMakeLists.txt b/yql/CMakeLists.txt new file mode 100644 index 0000000000..bd95d3af2b --- /dev/null +++ b/yql/CMakeLists.txt @@ -0,0 +1,9 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(udfs) diff --git a/yql/udfs/CMakeLists.txt b/yql/udfs/CMakeLists.txt new file mode 100644 index 0000000000..867161a12c --- /dev/null +++ b/yql/udfs/CMakeLists.txt @@ -0,0 +1,9 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(common) diff --git a/yql/udfs/common/CMakeLists.txt b/yql/udfs/common/CMakeLists.txt new file mode 100644 index 0000000000..46c961352b --- /dev/null +++ b/yql/udfs/common/CMakeLists.txt @@ -0,0 +1,10 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(ip) +add_subdirectory(url) diff --git a/yql/udfs/common/ip/CMakeLists.darwin-x86_64.txt b/yql/udfs/common/ip/CMakeLists.darwin-x86_64.txt new file mode 100644 index 0000000000..8c6217f557 --- /dev/null +++ b/yql/udfs/common/ip/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,36 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(ip_udf INTERFACE) +target_link_libraries(ip_udf INTERFACE + contrib-libs-cxxsupp + yutil + yql-public-udf + public-udf-support + library-cpp-ipreg + common-ip_base-lib +) + +add_global_library_for(ip_udf.global ip_udf) +target_compile_options(ip_udf.global PRIVATE + -DUDF_ABI_VERSION_MAJOR=2 + -DUDF_ABI_VERSION_MINOR=28 + -DUDF_ABI_VERSION_PATCH=0 +) +target_link_libraries(ip_udf.global PUBLIC + contrib-libs-cxxsupp + yutil + yql-public-udf + public-udf-support + library-cpp-ipreg + common-ip_base-lib +) +target_sources(ip_udf.global PRIVATE + ${CMAKE_SOURCE_DIR}/yql/udfs/common/ip/ip_udf.cpp +) diff --git a/yql/udfs/common/ip/CMakeLists.linux-aarch64.txt b/yql/udfs/common/ip/CMakeLists.linux-aarch64.txt new file mode 100644 index 0000000000..9e54b0014d --- /dev/null +++ b/yql/udfs/common/ip/CMakeLists.linux-aarch64.txt @@ -0,0 +1,38 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(ip_udf INTERFACE) +target_link_libraries(ip_udf INTERFACE + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + yql-public-udf + public-udf-support + library-cpp-ipreg + common-ip_base-lib +) + +add_global_library_for(ip_udf.global ip_udf) +target_compile_options(ip_udf.global PRIVATE + -DUDF_ABI_VERSION_MAJOR=2 + -DUDF_ABI_VERSION_MINOR=28 + -DUDF_ABI_VERSION_PATCH=0 +) +target_link_libraries(ip_udf.global PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + yql-public-udf + public-udf-support + library-cpp-ipreg + common-ip_base-lib +) +target_sources(ip_udf.global PRIVATE + ${CMAKE_SOURCE_DIR}/yql/udfs/common/ip/ip_udf.cpp +) diff --git a/yql/udfs/common/ip/CMakeLists.linux-x86_64.txt b/yql/udfs/common/ip/CMakeLists.linux-x86_64.txt new file mode 100644 index 0000000000..9e54b0014d --- /dev/null +++ b/yql/udfs/common/ip/CMakeLists.linux-x86_64.txt @@ -0,0 +1,38 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(ip_udf INTERFACE) +target_link_libraries(ip_udf INTERFACE + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + yql-public-udf + public-udf-support + library-cpp-ipreg + common-ip_base-lib +) + +add_global_library_for(ip_udf.global ip_udf) +target_compile_options(ip_udf.global PRIVATE + -DUDF_ABI_VERSION_MAJOR=2 + -DUDF_ABI_VERSION_MINOR=28 + -DUDF_ABI_VERSION_PATCH=0 +) +target_link_libraries(ip_udf.global PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + yql-public-udf + public-udf-support + library-cpp-ipreg + common-ip_base-lib +) +target_sources(ip_udf.global PRIVATE + ${CMAKE_SOURCE_DIR}/yql/udfs/common/ip/ip_udf.cpp +) diff --git a/yql/udfs/common/ip/CMakeLists.txt b/yql/udfs/common/ip/CMakeLists.txt new file mode 100644 index 0000000000..f8b31df0c1 --- /dev/null +++ b/yql/udfs/common/ip/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) + include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/yql/udfs/common/ip/CMakeLists.windows-x86_64.txt b/yql/udfs/common/ip/CMakeLists.windows-x86_64.txt new file mode 100644 index 0000000000..8c6217f557 --- /dev/null +++ b/yql/udfs/common/ip/CMakeLists.windows-x86_64.txt @@ -0,0 +1,36 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(ip_udf INTERFACE) +target_link_libraries(ip_udf INTERFACE + contrib-libs-cxxsupp + yutil + yql-public-udf + public-udf-support + library-cpp-ipreg + common-ip_base-lib +) + +add_global_library_for(ip_udf.global ip_udf) +target_compile_options(ip_udf.global PRIVATE + -DUDF_ABI_VERSION_MAJOR=2 + -DUDF_ABI_VERSION_MINOR=28 + -DUDF_ABI_VERSION_PATCH=0 +) +target_link_libraries(ip_udf.global PUBLIC + contrib-libs-cxxsupp + yutil + yql-public-udf + public-udf-support + library-cpp-ipreg + common-ip_base-lib +) +target_sources(ip_udf.global PRIVATE + ${CMAKE_SOURCE_DIR}/yql/udfs/common/ip/ip_udf.cpp +) diff --git a/yql/udfs/common/url/CMakeLists.darwin-x86_64.txt b/yql/udfs/common/url/CMakeLists.darwin-x86_64.txt new file mode 100644 index 0000000000..5b6b766df7 --- /dev/null +++ b/yql/udfs/common/url/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,42 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(url_udf INTERFACE) +target_link_libraries(url_udf INTERFACE + contrib-libs-cxxsupp + yutil + yql-public-udf + public-udf-support + kernel-hosts-owner + kernel-urlnorm + library-cpp-robots_txt + common-url_base-lib + yweb-robot-dbscheeme +) + +add_global_library_for(url_udf.global url_udf) +target_compile_options(url_udf.global PRIVATE + -DUDF_ABI_VERSION_MAJOR=2 + -DUDF_ABI_VERSION_MINOR=33 + -DUDF_ABI_VERSION_PATCH=0 +) +target_link_libraries(url_udf.global PUBLIC + contrib-libs-cxxsupp + yutil + yql-public-udf + public-udf-support + kernel-hosts-owner + kernel-urlnorm + library-cpp-robots_txt + common-url_base-lib + yweb-robot-dbscheeme +) +target_sources(url_udf.global PRIVATE + ${CMAKE_SOURCE_DIR}/yql/udfs/common/url/url_udf.cpp +) diff --git a/yql/udfs/common/url/CMakeLists.linux-aarch64.txt b/yql/udfs/common/url/CMakeLists.linux-aarch64.txt new file mode 100644 index 0000000000..467982bd9d --- /dev/null +++ b/yql/udfs/common/url/CMakeLists.linux-aarch64.txt @@ -0,0 +1,44 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(url_udf INTERFACE) +target_link_libraries(url_udf INTERFACE + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + yql-public-udf + public-udf-support + kernel-hosts-owner + kernel-urlnorm + library-cpp-robots_txt + common-url_base-lib + yweb-robot-dbscheeme +) + +add_global_library_for(url_udf.global url_udf) +target_compile_options(url_udf.global PRIVATE + -DUDF_ABI_VERSION_MAJOR=2 + -DUDF_ABI_VERSION_MINOR=33 + -DUDF_ABI_VERSION_PATCH=0 +) +target_link_libraries(url_udf.global PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + yql-public-udf + public-udf-support + kernel-hosts-owner + kernel-urlnorm + library-cpp-robots_txt + common-url_base-lib + yweb-robot-dbscheeme +) +target_sources(url_udf.global PRIVATE + ${CMAKE_SOURCE_DIR}/yql/udfs/common/url/url_udf.cpp +) diff --git a/yql/udfs/common/url/CMakeLists.linux-x86_64.txt b/yql/udfs/common/url/CMakeLists.linux-x86_64.txt new file mode 100644 index 0000000000..467982bd9d --- /dev/null +++ b/yql/udfs/common/url/CMakeLists.linux-x86_64.txt @@ -0,0 +1,44 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(url_udf INTERFACE) +target_link_libraries(url_udf INTERFACE + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + yql-public-udf + public-udf-support + kernel-hosts-owner + kernel-urlnorm + library-cpp-robots_txt + common-url_base-lib + yweb-robot-dbscheeme +) + +add_global_library_for(url_udf.global url_udf) +target_compile_options(url_udf.global PRIVATE + -DUDF_ABI_VERSION_MAJOR=2 + -DUDF_ABI_VERSION_MINOR=33 + -DUDF_ABI_VERSION_PATCH=0 +) +target_link_libraries(url_udf.global PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + yql-public-udf + public-udf-support + kernel-hosts-owner + kernel-urlnorm + library-cpp-robots_txt + common-url_base-lib + yweb-robot-dbscheeme +) +target_sources(url_udf.global PRIVATE + ${CMAKE_SOURCE_DIR}/yql/udfs/common/url/url_udf.cpp +) diff --git a/yql/udfs/common/url/CMakeLists.txt b/yql/udfs/common/url/CMakeLists.txt new file mode 100644 index 0000000000..f8b31df0c1 --- /dev/null +++ b/yql/udfs/common/url/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) + include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/yql/udfs/common/url/CMakeLists.windows-x86_64.txt b/yql/udfs/common/url/CMakeLists.windows-x86_64.txt new file mode 100644 index 0000000000..5b6b766df7 --- /dev/null +++ b/yql/udfs/common/url/CMakeLists.windows-x86_64.txt @@ -0,0 +1,42 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(url_udf INTERFACE) +target_link_libraries(url_udf INTERFACE + contrib-libs-cxxsupp + yutil + yql-public-udf + public-udf-support + kernel-hosts-owner + kernel-urlnorm + library-cpp-robots_txt + common-url_base-lib + yweb-robot-dbscheeme +) + +add_global_library_for(url_udf.global url_udf) +target_compile_options(url_udf.global PRIVATE + -DUDF_ABI_VERSION_MAJOR=2 + -DUDF_ABI_VERSION_MINOR=33 + -DUDF_ABI_VERSION_PATCH=0 +) +target_link_libraries(url_udf.global PUBLIC + contrib-libs-cxxsupp + yutil + yql-public-udf + public-udf-support + kernel-hosts-owner + kernel-urlnorm + library-cpp-robots_txt + common-url_base-lib + yweb-robot-dbscheeme +) +target_sources(url_udf.global PRIVATE + ${CMAKE_SOURCE_DIR}/yql/udfs/common/url/url_udf.cpp +) diff --git a/yweb/CMakeLists.txt b/yweb/CMakeLists.txt new file mode 100644 index 0000000000..d4925a6659 --- /dev/null +++ b/yweb/CMakeLists.txt @@ -0,0 +1,13 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(config) +add_subdirectory(protos) +add_subdirectory(realtime) +add_subdirectory(robot) +add_subdirectory(urlfilter) diff --git a/yweb/config/CMakeLists.darwin-x86_64.txt b/yweb/config/CMakeLists.darwin-x86_64.txt new file mode 100644 index 0000000000..f7c5bf51d3 --- /dev/null +++ b/yweb/config/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,44 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_enum_parser_bin + TOOL_enum_parser_dependency + tools/enum_parser/enum_parser + enum_parser +) + +add_library(yweb-config) +target_link_libraries(yweb-config PUBLIC + contrib-libs-cxxsupp + yutil + kernel-langregion + kernel-multilanguage_hosts + library-cpp-charset + cpp-deprecated-fgood + cpp-string_utils-url + library-cpp-yconf + yweb-protos-robotzones + tools-enum_parser-enum_serialization_runtime +) +target_sources(yweb-config PRIVATE + ${CMAKE_SOURCE_DIR}/yweb/config/environment.cpp + ${CMAKE_SOURCE_DIR}/yweb/config/hostconfig.cpp + ${CMAKE_SOURCE_DIR}/yweb/config/clustercfg.cpp + ${CMAKE_SOURCE_DIR}/yweb/config/robot_zone.cpp + ${CMAKE_SOURCE_DIR}/yweb/config/search_zone.cpp + ${CMAKE_SOURCE_DIR}/yweb/config/langregion.cpp + ${CMAKE_SOURCE_DIR}/yweb/config/logreader_config.cpp + ${CMAKE_SOURCE_DIR}/yweb/config/domainlevel.cpp + ${CMAKE_SOURCE_DIR}/yweb/config/upload_rule.cpp +) +generate_enum_serilization(yweb-config + ${CMAKE_SOURCE_DIR}/yweb/config/environment.h + INCLUDE_HEADERS + yweb/config/environment.h +) diff --git a/yweb/config/CMakeLists.linux-aarch64.txt b/yweb/config/CMakeLists.linux-aarch64.txt new file mode 100644 index 0000000000..d3ee8e1783 --- /dev/null +++ b/yweb/config/CMakeLists.linux-aarch64.txt @@ -0,0 +1,45 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_enum_parser_bin + TOOL_enum_parser_dependency + tools/enum_parser/enum_parser + enum_parser +) + +add_library(yweb-config) +target_link_libraries(yweb-config PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + kernel-langregion + kernel-multilanguage_hosts + library-cpp-charset + cpp-deprecated-fgood + cpp-string_utils-url + library-cpp-yconf + yweb-protos-robotzones + tools-enum_parser-enum_serialization_runtime +) +target_sources(yweb-config PRIVATE + ${CMAKE_SOURCE_DIR}/yweb/config/environment.cpp + ${CMAKE_SOURCE_DIR}/yweb/config/hostconfig.cpp + ${CMAKE_SOURCE_DIR}/yweb/config/clustercfg.cpp + ${CMAKE_SOURCE_DIR}/yweb/config/robot_zone.cpp + ${CMAKE_SOURCE_DIR}/yweb/config/search_zone.cpp + ${CMAKE_SOURCE_DIR}/yweb/config/langregion.cpp + ${CMAKE_SOURCE_DIR}/yweb/config/logreader_config.cpp + ${CMAKE_SOURCE_DIR}/yweb/config/domainlevel.cpp + ${CMAKE_SOURCE_DIR}/yweb/config/upload_rule.cpp +) +generate_enum_serilization(yweb-config + ${CMAKE_SOURCE_DIR}/yweb/config/environment.h + INCLUDE_HEADERS + yweb/config/environment.h +) diff --git a/yweb/config/CMakeLists.linux-x86_64.txt b/yweb/config/CMakeLists.linux-x86_64.txt new file mode 100644 index 0000000000..d3ee8e1783 --- /dev/null +++ b/yweb/config/CMakeLists.linux-x86_64.txt @@ -0,0 +1,45 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_enum_parser_bin + TOOL_enum_parser_dependency + tools/enum_parser/enum_parser + enum_parser +) + +add_library(yweb-config) +target_link_libraries(yweb-config PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + kernel-langregion + kernel-multilanguage_hosts + library-cpp-charset + cpp-deprecated-fgood + cpp-string_utils-url + library-cpp-yconf + yweb-protos-robotzones + tools-enum_parser-enum_serialization_runtime +) +target_sources(yweb-config PRIVATE + ${CMAKE_SOURCE_DIR}/yweb/config/environment.cpp + ${CMAKE_SOURCE_DIR}/yweb/config/hostconfig.cpp + ${CMAKE_SOURCE_DIR}/yweb/config/clustercfg.cpp + ${CMAKE_SOURCE_DIR}/yweb/config/robot_zone.cpp + ${CMAKE_SOURCE_DIR}/yweb/config/search_zone.cpp + ${CMAKE_SOURCE_DIR}/yweb/config/langregion.cpp + ${CMAKE_SOURCE_DIR}/yweb/config/logreader_config.cpp + ${CMAKE_SOURCE_DIR}/yweb/config/domainlevel.cpp + ${CMAKE_SOURCE_DIR}/yweb/config/upload_rule.cpp +) +generate_enum_serilization(yweb-config + ${CMAKE_SOURCE_DIR}/yweb/config/environment.h + INCLUDE_HEADERS + yweb/config/environment.h +) diff --git a/yweb/config/CMakeLists.txt b/yweb/config/CMakeLists.txt new file mode 100644 index 0000000000..f8b31df0c1 --- /dev/null +++ b/yweb/config/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) + include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/yweb/config/CMakeLists.windows-x86_64.txt b/yweb/config/CMakeLists.windows-x86_64.txt new file mode 100644 index 0000000000..f7c5bf51d3 --- /dev/null +++ b/yweb/config/CMakeLists.windows-x86_64.txt @@ -0,0 +1,44 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_enum_parser_bin + TOOL_enum_parser_dependency + tools/enum_parser/enum_parser + enum_parser +) + +add_library(yweb-config) +target_link_libraries(yweb-config PUBLIC + contrib-libs-cxxsupp + yutil + kernel-langregion + kernel-multilanguage_hosts + library-cpp-charset + cpp-deprecated-fgood + cpp-string_utils-url + library-cpp-yconf + yweb-protos-robotzones + tools-enum_parser-enum_serialization_runtime +) +target_sources(yweb-config PRIVATE + ${CMAKE_SOURCE_DIR}/yweb/config/environment.cpp + ${CMAKE_SOURCE_DIR}/yweb/config/hostconfig.cpp + ${CMAKE_SOURCE_DIR}/yweb/config/clustercfg.cpp + ${CMAKE_SOURCE_DIR}/yweb/config/robot_zone.cpp + ${CMAKE_SOURCE_DIR}/yweb/config/search_zone.cpp + ${CMAKE_SOURCE_DIR}/yweb/config/langregion.cpp + ${CMAKE_SOURCE_DIR}/yweb/config/logreader_config.cpp + ${CMAKE_SOURCE_DIR}/yweb/config/domainlevel.cpp + ${CMAKE_SOURCE_DIR}/yweb/config/upload_rule.cpp +) +generate_enum_serilization(yweb-config + ${CMAKE_SOURCE_DIR}/yweb/config/environment.h + INCLUDE_HEADERS + yweb/config/environment.h +) diff --git a/yweb/protos/CMakeLists.darwin-x86_64.txt b/yweb/protos/CMakeLists.darwin-x86_64.txt new file mode 100644 index 0000000000..b50e5a988f --- /dev/null +++ b/yweb/protos/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,387 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(robotzones) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) + +add_library(yweb-protos) +target_link_libraries(yweb-protos PUBLIC + contrib-libs-cxxsupp + yutil + kernel-mango-proto + kernel-search_zone-protos + yweb-realtime-protos + lib-indexannportion-input + zora-proto-common + contrib-libs-protobuf +) +target_proto_messages(yweb-protos PRIVATE + ${CMAKE_SOURCE_DIR}/yweb/protos/anti_export.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/aura.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/docfactors.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/export.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/geo.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/hostfactors.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/indexdata.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/indexeddoc.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/links.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/maskswithint.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/metric.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/metaquery_lang.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/navsource.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/navsource_strict.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/orange.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/oxygen.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/ownertreeweight.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/ownerbanmasks.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/robot.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/spamchecker.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/spider.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/scdata.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/mragent.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/sr.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/roboturlatrs.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/regwordhost.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/owneraspamdeddata.proto +) +target_proto_addincls(yweb-protos + ./ + ${CMAKE_SOURCE_DIR}/ + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src +) +target_proto_outs(yweb-protos + --cpp_out=${CMAKE_BINARY_DIR}/ + --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ +) diff --git a/yweb/protos/CMakeLists.linux-aarch64.txt b/yweb/protos/CMakeLists.linux-aarch64.txt new file mode 100644 index 0000000000..ebf3d0a113 --- /dev/null +++ b/yweb/protos/CMakeLists.linux-aarch64.txt @@ -0,0 +1,388 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(robotzones) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) + +add_library(yweb-protos) +target_link_libraries(yweb-protos PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + kernel-mango-proto + kernel-search_zone-protos + yweb-realtime-protos + lib-indexannportion-input + zora-proto-common + contrib-libs-protobuf +) +target_proto_messages(yweb-protos PRIVATE + ${CMAKE_SOURCE_DIR}/yweb/protos/anti_export.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/aura.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/docfactors.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/export.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/geo.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/hostfactors.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/indexdata.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/indexeddoc.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/links.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/maskswithint.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/metric.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/metaquery_lang.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/navsource.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/navsource_strict.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/orange.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/oxygen.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/ownertreeweight.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/ownerbanmasks.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/robot.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/spamchecker.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/spider.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/scdata.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/mragent.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/sr.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/roboturlatrs.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/regwordhost.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/owneraspamdeddata.proto +) +target_proto_addincls(yweb-protos + ./ + ${CMAKE_SOURCE_DIR}/ + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src +) +target_proto_outs(yweb-protos + --cpp_out=${CMAKE_BINARY_DIR}/ + --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ +) diff --git a/yweb/protos/CMakeLists.linux-x86_64.txt b/yweb/protos/CMakeLists.linux-x86_64.txt new file mode 100644 index 0000000000..ebf3d0a113 --- /dev/null +++ b/yweb/protos/CMakeLists.linux-x86_64.txt @@ -0,0 +1,388 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(robotzones) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) + +add_library(yweb-protos) +target_link_libraries(yweb-protos PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + kernel-mango-proto + kernel-search_zone-protos + yweb-realtime-protos + lib-indexannportion-input + zora-proto-common + contrib-libs-protobuf +) +target_proto_messages(yweb-protos PRIVATE + ${CMAKE_SOURCE_DIR}/yweb/protos/anti_export.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/aura.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/docfactors.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/export.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/geo.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/hostfactors.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/indexdata.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/indexeddoc.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/links.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/maskswithint.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/metric.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/metaquery_lang.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/navsource.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/navsource_strict.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/orange.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/oxygen.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/ownertreeweight.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/ownerbanmasks.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/robot.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/spamchecker.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/spider.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/scdata.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/mragent.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/sr.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/roboturlatrs.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/regwordhost.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/owneraspamdeddata.proto +) +target_proto_addincls(yweb-protos + ./ + ${CMAKE_SOURCE_DIR}/ + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src +) +target_proto_outs(yweb-protos + --cpp_out=${CMAKE_BINARY_DIR}/ + --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ +) diff --git a/yweb/protos/CMakeLists.txt b/yweb/protos/CMakeLists.txt new file mode 100644 index 0000000000..f8b31df0c1 --- /dev/null +++ b/yweb/protos/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) + include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/yweb/protos/CMakeLists.windows-x86_64.txt b/yweb/protos/CMakeLists.windows-x86_64.txt new file mode 100644 index 0000000000..b50e5a988f --- /dev/null +++ b/yweb/protos/CMakeLists.windows-x86_64.txt @@ -0,0 +1,387 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(robotzones) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) + +add_library(yweb-protos) +target_link_libraries(yweb-protos PUBLIC + contrib-libs-cxxsupp + yutil + kernel-mango-proto + kernel-search_zone-protos + yweb-realtime-protos + lib-indexannportion-input + zora-proto-common + contrib-libs-protobuf +) +target_proto_messages(yweb-protos PRIVATE + ${CMAKE_SOURCE_DIR}/yweb/protos/anti_export.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/aura.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/docfactors.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/export.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/geo.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/hostfactors.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/indexdata.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/indexeddoc.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/links.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/maskswithint.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/metric.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/metaquery_lang.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/navsource.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/navsource_strict.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/orange.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/oxygen.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/ownertreeweight.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/ownerbanmasks.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/robot.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/spamchecker.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/spider.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/scdata.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/mragent.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/sr.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/roboturlatrs.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/regwordhost.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/owneraspamdeddata.proto +) +target_proto_addincls(yweb-protos + ./ + ${CMAKE_SOURCE_DIR}/ + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src +) +target_proto_outs(yweb-protos + --cpp_out=${CMAKE_BINARY_DIR}/ + --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ +) diff --git a/yweb/protos/robotzones/CMakeLists.darwin-x86_64.txt b/yweb/protos/robotzones/CMakeLists.darwin-x86_64.txt new file mode 100644 index 0000000000..e2c5740248 --- /dev/null +++ b/yweb/protos/robotzones/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,56 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) + +add_library(yweb-protos-robotzones) +target_link_libraries(yweb-protos-robotzones PUBLIC + contrib-libs-cxxsupp + yutil + contrib-libs-protobuf +) +target_proto_messages(yweb-protos-robotzones PRIVATE + ${CMAKE_SOURCE_DIR}/yweb/protos/robotzones/robotzone.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/robotzones/uploadrules.proto +) +target_proto_addincls(yweb-protos-robotzones + ./ + ${CMAKE_SOURCE_DIR}/ + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src +) +target_proto_outs(yweb-protos-robotzones + --cpp_out=${CMAKE_BINARY_DIR}/ + --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ +) diff --git a/yweb/protos/robotzones/CMakeLists.linux-aarch64.txt b/yweb/protos/robotzones/CMakeLists.linux-aarch64.txt new file mode 100644 index 0000000000..0b52e1fdb2 --- /dev/null +++ b/yweb/protos/robotzones/CMakeLists.linux-aarch64.txt @@ -0,0 +1,57 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) + +add_library(yweb-protos-robotzones) +target_link_libraries(yweb-protos-robotzones PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + contrib-libs-protobuf +) +target_proto_messages(yweb-protos-robotzones PRIVATE + ${CMAKE_SOURCE_DIR}/yweb/protos/robotzones/robotzone.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/robotzones/uploadrules.proto +) +target_proto_addincls(yweb-protos-robotzones + ./ + ${CMAKE_SOURCE_DIR}/ + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src +) +target_proto_outs(yweb-protos-robotzones + --cpp_out=${CMAKE_BINARY_DIR}/ + --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ +) diff --git a/yweb/protos/robotzones/CMakeLists.linux-x86_64.txt b/yweb/protos/robotzones/CMakeLists.linux-x86_64.txt new file mode 100644 index 0000000000..0b52e1fdb2 --- /dev/null +++ b/yweb/protos/robotzones/CMakeLists.linux-x86_64.txt @@ -0,0 +1,57 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) + +add_library(yweb-protos-robotzones) +target_link_libraries(yweb-protos-robotzones PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + contrib-libs-protobuf +) +target_proto_messages(yweb-protos-robotzones PRIVATE + ${CMAKE_SOURCE_DIR}/yweb/protos/robotzones/robotzone.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/robotzones/uploadrules.proto +) +target_proto_addincls(yweb-protos-robotzones + ./ + ${CMAKE_SOURCE_DIR}/ + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src +) +target_proto_outs(yweb-protos-robotzones + --cpp_out=${CMAKE_BINARY_DIR}/ + --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ +) diff --git a/yweb/protos/robotzones/CMakeLists.txt b/yweb/protos/robotzones/CMakeLists.txt new file mode 100644 index 0000000000..f8b31df0c1 --- /dev/null +++ b/yweb/protos/robotzones/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) + include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/yweb/protos/robotzones/CMakeLists.windows-x86_64.txt b/yweb/protos/robotzones/CMakeLists.windows-x86_64.txt new file mode 100644 index 0000000000..e2c5740248 --- /dev/null +++ b/yweb/protos/robotzones/CMakeLists.windows-x86_64.txt @@ -0,0 +1,56 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) + +add_library(yweb-protos-robotzones) +target_link_libraries(yweb-protos-robotzones PUBLIC + contrib-libs-cxxsupp + yutil + contrib-libs-protobuf +) +target_proto_messages(yweb-protos-robotzones PRIVATE + ${CMAKE_SOURCE_DIR}/yweb/protos/robotzones/robotzone.proto + ${CMAKE_SOURCE_DIR}/yweb/protos/robotzones/uploadrules.proto +) +target_proto_addincls(yweb-protos-robotzones + ./ + ${CMAKE_SOURCE_DIR}/ + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src +) +target_proto_outs(yweb-protos-robotzones + --cpp_out=${CMAKE_BINARY_DIR}/ + --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ +) diff --git a/yweb/realtime/CMakeLists.txt b/yweb/realtime/CMakeLists.txt new file mode 100644 index 0000000000..6d580ae9ad --- /dev/null +++ b/yweb/realtime/CMakeLists.txt @@ -0,0 +1,9 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(protos) diff --git a/yweb/realtime/protos/CMakeLists.darwin-x86_64.txt b/yweb/realtime/protos/CMakeLists.darwin-x86_64.txt new file mode 100644 index 0000000000..6444171984 --- /dev/null +++ b/yweb/realtime/protos/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,56 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) + +add_library(yweb-realtime-protos) +target_link_libraries(yweb-realtime-protos PUBLIC + contrib-libs-cxxsupp + yutil + contrib-libs-protobuf +) +target_proto_messages(yweb-realtime-protos PRIVATE + ${CMAKE_SOURCE_DIR}/yweb/realtime/protos/distributor_dump.proto + ${CMAKE_SOURCE_DIR}/yweb/realtime/protos/option.proto +) +target_proto_addincls(yweb-realtime-protos + ./ + ${CMAKE_SOURCE_DIR}/ + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src +) +target_proto_outs(yweb-realtime-protos + --cpp_out=${CMAKE_BINARY_DIR}/ + --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ +) diff --git a/yweb/realtime/protos/CMakeLists.linux-aarch64.txt b/yweb/realtime/protos/CMakeLists.linux-aarch64.txt new file mode 100644 index 0000000000..9a3a36a261 --- /dev/null +++ b/yweb/realtime/protos/CMakeLists.linux-aarch64.txt @@ -0,0 +1,57 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) + +add_library(yweb-realtime-protos) +target_link_libraries(yweb-realtime-protos PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + contrib-libs-protobuf +) +target_proto_messages(yweb-realtime-protos PRIVATE + ${CMAKE_SOURCE_DIR}/yweb/realtime/protos/distributor_dump.proto + ${CMAKE_SOURCE_DIR}/yweb/realtime/protos/option.proto +) +target_proto_addincls(yweb-realtime-protos + ./ + ${CMAKE_SOURCE_DIR}/ + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src +) +target_proto_outs(yweb-realtime-protos + --cpp_out=${CMAKE_BINARY_DIR}/ + --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ +) diff --git a/yweb/realtime/protos/CMakeLists.linux-x86_64.txt b/yweb/realtime/protos/CMakeLists.linux-x86_64.txt new file mode 100644 index 0000000000..9a3a36a261 --- /dev/null +++ b/yweb/realtime/protos/CMakeLists.linux-x86_64.txt @@ -0,0 +1,57 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) + +add_library(yweb-realtime-protos) +target_link_libraries(yweb-realtime-protos PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + contrib-libs-protobuf +) +target_proto_messages(yweb-realtime-protos PRIVATE + ${CMAKE_SOURCE_DIR}/yweb/realtime/protos/distributor_dump.proto + ${CMAKE_SOURCE_DIR}/yweb/realtime/protos/option.proto +) +target_proto_addincls(yweb-realtime-protos + ./ + ${CMAKE_SOURCE_DIR}/ + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src +) +target_proto_outs(yweb-realtime-protos + --cpp_out=${CMAKE_BINARY_DIR}/ + --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ +) diff --git a/yweb/realtime/protos/CMakeLists.txt b/yweb/realtime/protos/CMakeLists.txt new file mode 100644 index 0000000000..f8b31df0c1 --- /dev/null +++ b/yweb/realtime/protos/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) + include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/yweb/realtime/protos/CMakeLists.windows-x86_64.txt b/yweb/realtime/protos/CMakeLists.windows-x86_64.txt new file mode 100644 index 0000000000..6444171984 --- /dev/null +++ b/yweb/realtime/protos/CMakeLists.windows-x86_64.txt @@ -0,0 +1,56 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) + +add_library(yweb-realtime-protos) +target_link_libraries(yweb-realtime-protos PUBLIC + contrib-libs-cxxsupp + yutil + contrib-libs-protobuf +) +target_proto_messages(yweb-realtime-protos PRIVATE + ${CMAKE_SOURCE_DIR}/yweb/realtime/protos/distributor_dump.proto + ${CMAKE_SOURCE_DIR}/yweb/realtime/protos/option.proto +) +target_proto_addincls(yweb-realtime-protos + ./ + ${CMAKE_SOURCE_DIR}/ + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src +) +target_proto_outs(yweb-realtime-protos + --cpp_out=${CMAKE_BINARY_DIR}/ + --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ +) diff --git a/yweb/robot/CMakeLists.txt b/yweb/robot/CMakeLists.txt new file mode 100644 index 0000000000..9b430d841e --- /dev/null +++ b/yweb/robot/CMakeLists.txt @@ -0,0 +1,10 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(dbscheeme) +add_subdirectory(kiwi_queries) diff --git a/yweb/robot/dbscheeme/CMakeLists.darwin-x86_64.txt b/yweb/robot/dbscheeme/CMakeLists.darwin-x86_64.txt new file mode 100644 index 0000000000..c06e393a77 --- /dev/null +++ b/yweb/robot/dbscheeme/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,32 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +find_package(ZLIB REQUIRED) + +add_library(yweb-robot-dbscheeme) +target_link_libraries(yweb-robot-dbscheeme PUBLIC + contrib-libs-cxxsupp + yutil + ZLIB::ZLIB + kernel-hosts-owner + kernel-langregion + kernel-urlnorm + library-cpp-charset + cpp-deprecated-autoarray + cpp-digest-old_crc + library-cpp-microbdb + cpp-mime-types + library-cpp-robots_txt + yweb-config + yweb-protos + yweb-urlfilter +) +target_sources(yweb-robot-dbscheeme PRIVATE + ${CMAKE_SOURCE_DIR}/yweb/robot/dbscheeme/extrecords.cpp + ${CMAKE_SOURCE_DIR}/yweb/robot/dbscheeme/lib.cpp +) diff --git a/yweb/robot/dbscheeme/CMakeLists.linux-aarch64.txt b/yweb/robot/dbscheeme/CMakeLists.linux-aarch64.txt new file mode 100644 index 0000000000..04638ac386 --- /dev/null +++ b/yweb/robot/dbscheeme/CMakeLists.linux-aarch64.txt @@ -0,0 +1,33 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +find_package(ZLIB REQUIRED) + +add_library(yweb-robot-dbscheeme) +target_link_libraries(yweb-robot-dbscheeme PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + ZLIB::ZLIB + kernel-hosts-owner + kernel-langregion + kernel-urlnorm + library-cpp-charset + cpp-deprecated-autoarray + cpp-digest-old_crc + library-cpp-microbdb + cpp-mime-types + library-cpp-robots_txt + yweb-config + yweb-protos + yweb-urlfilter +) +target_sources(yweb-robot-dbscheeme PRIVATE + ${CMAKE_SOURCE_DIR}/yweb/robot/dbscheeme/extrecords.cpp + ${CMAKE_SOURCE_DIR}/yweb/robot/dbscheeme/lib.cpp +) diff --git a/yweb/robot/dbscheeme/CMakeLists.linux-x86_64.txt b/yweb/robot/dbscheeme/CMakeLists.linux-x86_64.txt new file mode 100644 index 0000000000..04638ac386 --- /dev/null +++ b/yweb/robot/dbscheeme/CMakeLists.linux-x86_64.txt @@ -0,0 +1,33 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +find_package(ZLIB REQUIRED) + +add_library(yweb-robot-dbscheeme) +target_link_libraries(yweb-robot-dbscheeme PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + ZLIB::ZLIB + kernel-hosts-owner + kernel-langregion + kernel-urlnorm + library-cpp-charset + cpp-deprecated-autoarray + cpp-digest-old_crc + library-cpp-microbdb + cpp-mime-types + library-cpp-robots_txt + yweb-config + yweb-protos + yweb-urlfilter +) +target_sources(yweb-robot-dbscheeme PRIVATE + ${CMAKE_SOURCE_DIR}/yweb/robot/dbscheeme/extrecords.cpp + ${CMAKE_SOURCE_DIR}/yweb/robot/dbscheeme/lib.cpp +) diff --git a/yweb/robot/dbscheeme/CMakeLists.txt b/yweb/robot/dbscheeme/CMakeLists.txt new file mode 100644 index 0000000000..f8b31df0c1 --- /dev/null +++ b/yweb/robot/dbscheeme/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) + include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/yweb/robot/dbscheeme/CMakeLists.windows-x86_64.txt b/yweb/robot/dbscheeme/CMakeLists.windows-x86_64.txt new file mode 100644 index 0000000000..c06e393a77 --- /dev/null +++ b/yweb/robot/dbscheeme/CMakeLists.windows-x86_64.txt @@ -0,0 +1,32 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +find_package(ZLIB REQUIRED) + +add_library(yweb-robot-dbscheeme) +target_link_libraries(yweb-robot-dbscheeme PUBLIC + contrib-libs-cxxsupp + yutil + ZLIB::ZLIB + kernel-hosts-owner + kernel-langregion + kernel-urlnorm + library-cpp-charset + cpp-deprecated-autoarray + cpp-digest-old_crc + library-cpp-microbdb + cpp-mime-types + library-cpp-robots_txt + yweb-config + yweb-protos + yweb-urlfilter +) +target_sources(yweb-robot-dbscheeme PRIVATE + ${CMAKE_SOURCE_DIR}/yweb/robot/dbscheeme/extrecords.cpp + ${CMAKE_SOURCE_DIR}/yweb/robot/dbscheeme/lib.cpp +) diff --git a/yweb/robot/kiwi_queries/CMakeLists.txt b/yweb/robot/kiwi_queries/CMakeLists.txt new file mode 100644 index 0000000000..82d0377971 --- /dev/null +++ b/yweb/robot/kiwi_queries/CMakeLists.txt @@ -0,0 +1,9 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(others) diff --git a/yweb/robot/kiwi_queries/others/CMakeLists.txt b/yweb/robot/kiwi_queries/others/CMakeLists.txt new file mode 100644 index 0000000000..ea6c677000 --- /dev/null +++ b/yweb/robot/kiwi_queries/others/CMakeLists.txt @@ -0,0 +1,9 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(lib) diff --git a/yweb/robot/kiwi_queries/others/lib/CMakeLists.txt b/yweb/robot/kiwi_queries/others/lib/CMakeLists.txt new file mode 100644 index 0000000000..d300d6d8a9 --- /dev/null +++ b/yweb/robot/kiwi_queries/others/lib/CMakeLists.txt @@ -0,0 +1,9 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(indexannportion) diff --git a/yweb/robot/kiwi_queries/others/lib/indexannportion/CMakeLists.txt b/yweb/robot/kiwi_queries/others/lib/indexannportion/CMakeLists.txt new file mode 100644 index 0000000000..1600e018b8 --- /dev/null +++ b/yweb/robot/kiwi_queries/others/lib/indexannportion/CMakeLists.txt @@ -0,0 +1,9 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(input) diff --git a/yweb/robot/kiwi_queries/others/lib/indexannportion/input/CMakeLists.darwin-x86_64.txt b/yweb/robot/kiwi_queries/others/lib/indexannportion/input/CMakeLists.darwin-x86_64.txt new file mode 100644 index 0000000000..4919e5c2a0 --- /dev/null +++ b/yweb/robot/kiwi_queries/others/lib/indexannportion/input/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,43 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) + +add_library(lib-indexannportion-input) +target_link_libraries(lib-indexannportion-input PUBLIC + contrib-libs-cxxsupp + yutil + contrib-libs-protobuf +) +target_proto_messages(lib-indexannportion-input PRIVATE + ${CMAKE_SOURCE_DIR}/yweb/robot/kiwi_queries/others/lib/indexannportion/input/annotations.proto +) +target_proto_addincls(lib-indexannportion-input + ./ + ${CMAKE_SOURCE_DIR}/ + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src +) +target_proto_outs(lib-indexannportion-input + --cpp_out=${CMAKE_BINARY_DIR}/ + --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ +) diff --git a/yweb/robot/kiwi_queries/others/lib/indexannportion/input/CMakeLists.linux-aarch64.txt b/yweb/robot/kiwi_queries/others/lib/indexannportion/input/CMakeLists.linux-aarch64.txt new file mode 100644 index 0000000000..e25b79e730 --- /dev/null +++ b/yweb/robot/kiwi_queries/others/lib/indexannportion/input/CMakeLists.linux-aarch64.txt @@ -0,0 +1,44 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) + +add_library(lib-indexannportion-input) +target_link_libraries(lib-indexannportion-input PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + contrib-libs-protobuf +) +target_proto_messages(lib-indexannportion-input PRIVATE + ${CMAKE_SOURCE_DIR}/yweb/robot/kiwi_queries/others/lib/indexannportion/input/annotations.proto +) +target_proto_addincls(lib-indexannportion-input + ./ + ${CMAKE_SOURCE_DIR}/ + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src +) +target_proto_outs(lib-indexannportion-input + --cpp_out=${CMAKE_BINARY_DIR}/ + --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ +) diff --git a/yweb/robot/kiwi_queries/others/lib/indexannportion/input/CMakeLists.linux-x86_64.txt b/yweb/robot/kiwi_queries/others/lib/indexannportion/input/CMakeLists.linux-x86_64.txt new file mode 100644 index 0000000000..e25b79e730 --- /dev/null +++ b/yweb/robot/kiwi_queries/others/lib/indexannportion/input/CMakeLists.linux-x86_64.txt @@ -0,0 +1,44 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) + +add_library(lib-indexannportion-input) +target_link_libraries(lib-indexannportion-input PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + contrib-libs-protobuf +) +target_proto_messages(lib-indexannportion-input PRIVATE + ${CMAKE_SOURCE_DIR}/yweb/robot/kiwi_queries/others/lib/indexannportion/input/annotations.proto +) +target_proto_addincls(lib-indexannportion-input + ./ + ${CMAKE_SOURCE_DIR}/ + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src +) +target_proto_outs(lib-indexannportion-input + --cpp_out=${CMAKE_BINARY_DIR}/ + --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ +) diff --git a/yweb/robot/kiwi_queries/others/lib/indexannportion/input/CMakeLists.txt b/yweb/robot/kiwi_queries/others/lib/indexannportion/input/CMakeLists.txt new file mode 100644 index 0000000000..f8b31df0c1 --- /dev/null +++ b/yweb/robot/kiwi_queries/others/lib/indexannportion/input/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) + include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/yweb/robot/kiwi_queries/others/lib/indexannportion/input/CMakeLists.windows-x86_64.txt b/yweb/robot/kiwi_queries/others/lib/indexannportion/input/CMakeLists.windows-x86_64.txt new file mode 100644 index 0000000000..4919e5c2a0 --- /dev/null +++ b/yweb/robot/kiwi_queries/others/lib/indexannportion/input/CMakeLists.windows-x86_64.txt @@ -0,0 +1,43 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) + +add_library(lib-indexannportion-input) +target_link_libraries(lib-indexannportion-input PUBLIC + contrib-libs-cxxsupp + yutil + contrib-libs-protobuf +) +target_proto_messages(lib-indexannportion-input PRIVATE + ${CMAKE_SOURCE_DIR}/yweb/robot/kiwi_queries/others/lib/indexannportion/input/annotations.proto +) +target_proto_addincls(lib-indexannportion-input + ./ + ${CMAKE_SOURCE_DIR}/ + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src +) +target_proto_outs(lib-indexannportion-input + --cpp_out=${CMAKE_BINARY_DIR}/ + --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ +) diff --git a/yweb/urlfilter/CMakeLists.darwin-x86_64.txt b/yweb/urlfilter/CMakeLists.darwin-x86_64.txt new file mode 100644 index 0000000000..fa1d7a24b2 --- /dev/null +++ b/yweb/urlfilter/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,27 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(yweb-urlfilter) +target_link_libraries(yweb-urlfilter PUBLIC + contrib-libs-cxxsupp + yutil + cpp-deprecated-autoarray + cpp-deprecated-datafile + cpp-deprecated-fgood + cpp-regex-glob + cpp-regex-pcre + cpp-string_utils-url + library-cpp-uri +) +target_sources(yweb-urlfilter PRIVATE + ${CMAKE_SOURCE_DIR}/yweb/urlfilter/hide.cpp + ${CMAKE_SOURCE_DIR}/yweb/urlfilter/urlfilter.cpp + ${CMAKE_SOURCE_DIR}/yweb/urlfilter/findboard.cpp + ${CMAKE_SOURCE_DIR}/yweb/urlfilter/urlfilter_builder.cpp +) diff --git a/yweb/urlfilter/CMakeLists.linux-aarch64.txt b/yweb/urlfilter/CMakeLists.linux-aarch64.txt new file mode 100644 index 0000000000..2466f7e39a --- /dev/null +++ b/yweb/urlfilter/CMakeLists.linux-aarch64.txt @@ -0,0 +1,28 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(yweb-urlfilter) +target_link_libraries(yweb-urlfilter PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + cpp-deprecated-autoarray + cpp-deprecated-datafile + cpp-deprecated-fgood + cpp-regex-glob + cpp-regex-pcre + cpp-string_utils-url + library-cpp-uri +) +target_sources(yweb-urlfilter PRIVATE + ${CMAKE_SOURCE_DIR}/yweb/urlfilter/hide.cpp + ${CMAKE_SOURCE_DIR}/yweb/urlfilter/urlfilter.cpp + ${CMAKE_SOURCE_DIR}/yweb/urlfilter/findboard.cpp + ${CMAKE_SOURCE_DIR}/yweb/urlfilter/urlfilter_builder.cpp +) diff --git a/yweb/urlfilter/CMakeLists.linux-x86_64.txt b/yweb/urlfilter/CMakeLists.linux-x86_64.txt new file mode 100644 index 0000000000..2466f7e39a --- /dev/null +++ b/yweb/urlfilter/CMakeLists.linux-x86_64.txt @@ -0,0 +1,28 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(yweb-urlfilter) +target_link_libraries(yweb-urlfilter PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + cpp-deprecated-autoarray + cpp-deprecated-datafile + cpp-deprecated-fgood + cpp-regex-glob + cpp-regex-pcre + cpp-string_utils-url + library-cpp-uri +) +target_sources(yweb-urlfilter PRIVATE + ${CMAKE_SOURCE_DIR}/yweb/urlfilter/hide.cpp + ${CMAKE_SOURCE_DIR}/yweb/urlfilter/urlfilter.cpp + ${CMAKE_SOURCE_DIR}/yweb/urlfilter/findboard.cpp + ${CMAKE_SOURCE_DIR}/yweb/urlfilter/urlfilter_builder.cpp +) diff --git a/yweb/urlfilter/CMakeLists.txt b/yweb/urlfilter/CMakeLists.txt new file mode 100644 index 0000000000..f8b31df0c1 --- /dev/null +++ b/yweb/urlfilter/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) + include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/yweb/urlfilter/CMakeLists.windows-x86_64.txt b/yweb/urlfilter/CMakeLists.windows-x86_64.txt new file mode 100644 index 0000000000..fa1d7a24b2 --- /dev/null +++ b/yweb/urlfilter/CMakeLists.windows-x86_64.txt @@ -0,0 +1,27 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(yweb-urlfilter) +target_link_libraries(yweb-urlfilter PUBLIC + contrib-libs-cxxsupp + yutil + cpp-deprecated-autoarray + cpp-deprecated-datafile + cpp-deprecated-fgood + cpp-regex-glob + cpp-regex-pcre + cpp-string_utils-url + library-cpp-uri +) +target_sources(yweb-urlfilter PRIVATE + ${CMAKE_SOURCE_DIR}/yweb/urlfilter/hide.cpp + ${CMAKE_SOURCE_DIR}/yweb/urlfilter/urlfilter.cpp + ${CMAKE_SOURCE_DIR}/yweb/urlfilter/findboard.cpp + ${CMAKE_SOURCE_DIR}/yweb/urlfilter/urlfilter_builder.cpp +) diff --git a/zora/CMakeLists.txt b/zora/CMakeLists.txt new file mode 100644 index 0000000000..6da313a87f --- /dev/null +++ b/zora/CMakeLists.txt @@ -0,0 +1,9 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(zora) diff --git a/zora/zora/CMakeLists.txt b/zora/zora/CMakeLists.txt new file mode 100644 index 0000000000..499930c4b0 --- /dev/null +++ b/zora/zora/CMakeLists.txt @@ -0,0 +1,9 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(proto) diff --git a/zora/zora/proto/CMakeLists.txt b/zora/zora/proto/CMakeLists.txt new file mode 100644 index 0000000000..867161a12c --- /dev/null +++ b/zora/zora/proto/CMakeLists.txt @@ -0,0 +1,9 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(common) diff --git a/zora/zora/proto/common/CMakeLists.darwin-x86_64.txt b/zora/zora/proto/common/CMakeLists.darwin-x86_64.txt new file mode 100644 index 0000000000..c7ebbe0c2b --- /dev/null +++ b/zora/zora/proto/common/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,43 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) + +add_library(zora-proto-common) +target_link_libraries(zora-proto-common PUBLIC + contrib-libs-cxxsupp + yutil + contrib-libs-protobuf +) +target_proto_messages(zora-proto-common PRIVATE + ${CMAKE_SOURCE_DIR}/zora/zora/proto/common/common.proto +) +target_proto_addincls(zora-proto-common + ./ + ${CMAKE_SOURCE_DIR}/ + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src +) +target_proto_outs(zora-proto-common + --cpp_out=${CMAKE_BINARY_DIR}/ + --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ +) diff --git a/zora/zora/proto/common/CMakeLists.linux-aarch64.txt b/zora/zora/proto/common/CMakeLists.linux-aarch64.txt new file mode 100644 index 0000000000..8144cc9077 --- /dev/null +++ b/zora/zora/proto/common/CMakeLists.linux-aarch64.txt @@ -0,0 +1,44 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) + +add_library(zora-proto-common) +target_link_libraries(zora-proto-common PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + contrib-libs-protobuf +) +target_proto_messages(zora-proto-common PRIVATE + ${CMAKE_SOURCE_DIR}/zora/zora/proto/common/common.proto +) +target_proto_addincls(zora-proto-common + ./ + ${CMAKE_SOURCE_DIR}/ + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src +) +target_proto_outs(zora-proto-common + --cpp_out=${CMAKE_BINARY_DIR}/ + --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ +) diff --git a/zora/zora/proto/common/CMakeLists.linux-x86_64.txt b/zora/zora/proto/common/CMakeLists.linux-x86_64.txt new file mode 100644 index 0000000000..8144cc9077 --- /dev/null +++ b/zora/zora/proto/common/CMakeLists.linux-x86_64.txt @@ -0,0 +1,44 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) + +add_library(zora-proto-common) +target_link_libraries(zora-proto-common PUBLIC + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + contrib-libs-protobuf +) +target_proto_messages(zora-proto-common PRIVATE + ${CMAKE_SOURCE_DIR}/zora/zora/proto/common/common.proto +) +target_proto_addincls(zora-proto-common + ./ + ${CMAKE_SOURCE_DIR}/ + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src +) +target_proto_outs(zora-proto-common + --cpp_out=${CMAKE_BINARY_DIR}/ + --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ +) diff --git a/zora/zora/proto/common/CMakeLists.txt b/zora/zora/proto/common/CMakeLists.txt new file mode 100644 index 0000000000..f8b31df0c1 --- /dev/null +++ b/zora/zora/proto/common/CMakeLists.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) + include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/zora/zora/proto/common/CMakeLists.windows-x86_64.txt b/zora/zora/proto/common/CMakeLists.windows-x86_64.txt new file mode 100644 index 0000000000..c7ebbe0c2b --- /dev/null +++ b/zora/zora/proto/common/CMakeLists.windows-x86_64.txt @@ -0,0 +1,43 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +get_built_tool_path( + TOOL_protoc_bin + TOOL_protoc_dependency + contrib/tools/protoc/bin + protoc +) +get_built_tool_path( + TOOL_cpp_styleguide_bin + TOOL_cpp_styleguide_dependency + contrib/tools/protoc/plugins/cpp_styleguide + cpp_styleguide +) + +add_library(zora-proto-common) +target_link_libraries(zora-proto-common PUBLIC + contrib-libs-cxxsupp + yutil + contrib-libs-protobuf +) +target_proto_messages(zora-proto-common PRIVATE + ${CMAKE_SOURCE_DIR}/zora/zora/proto/common/common.proto +) +target_proto_addincls(zora-proto-common + ./ + ${CMAKE_SOURCE_DIR}/ + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/contrib/libs/protobuf/src +) +target_proto_outs(zora-proto-common + --cpp_out=${CMAKE_BINARY_DIR}/ + --cpp_styleguide_out=${CMAKE_BINARY_DIR}/ +) |