aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/fieldcalc
diff options
context:
space:
mode:
authorrobot-piglet <robot-piglet@yandex-team.com>2023-12-02 01:45:21 +0300
committerrobot-piglet <robot-piglet@yandex-team.com>2023-12-02 02:42:50 +0300
commit9c43d58f75cf086b744cf4fe2ae180e8f37e4a0c (patch)
tree9f88a486917d371d099cd712efd91b4c122d209d /library/cpp/fieldcalc
parent32fb6dda1feb24f9ab69ece5df0cb9ec238ca5e6 (diff)
downloadydb-9c43d58f75cf086b744cf4fe2ae180e8f37e4a0c.tar.gz
Intermediate changes
Diffstat (limited to 'library/cpp/fieldcalc')
-rw-r--r--library/cpp/fieldcalc/field_calc.cpp1136
-rw-r--r--library/cpp/fieldcalc/field_calc.h136
-rw-r--r--library/cpp/fieldcalc/field_calc_int.h593
-rw-r--r--library/cpp/fieldcalc/lossy_types.h52
-rw-r--r--library/cpp/fieldcalc/ya.make13
5 files changed, 1930 insertions, 0 deletions
diff --git a/library/cpp/fieldcalc/field_calc.cpp b/library/cpp/fieldcalc/field_calc.cpp
new file mode 100644
index 0000000000..1066b5b5e6
--- /dev/null
+++ b/library/cpp/fieldcalc/field_calc.cpp
@@ -0,0 +1,1136 @@
+#include <cstdio>
+
+#include <util/str_stl.h>
+#include <util/string/subst.h>
+#include <util/string/util.h>
+#include <util/string/cast.h>
+#include <util/stream/printf.h>
+
+#include "field_calc_int.h"
+
+using namespace std;
+
+enum Operators {
+ OP_ADD,
+ OP_SUBSTRACT,
+ OP_MULTIPLY,
+ OP_DIVIDE,
+ OP_MODULUS,
+ OP_REGEXP,
+ OP_REGEXP_NOT,
+ OP_LEFT_SHIFT,
+ OP_RIGHT_SHIFT,
+ OP_EQUAL,
+ OP_NOT_EQUAL,
+ OP_LESS,
+ OP_LESS_OR_EQUAL,
+ OP_GREATER,
+ OP_GREATER_OR_EQUAL,
+ OP_XOR,
+ OP_BITWISE_OR,
+ OP_BITWISE_AND,
+ OP_LOGICAL_OR,
+ OP_LOGICAL_AND,
+ OP_UNARY_NOT,
+ OP_UNARY_COMPLEMENT,
+ OP_UNARY_MINUS,
+ OP_LOG,
+ OP_LOG10,
+ OP_ROUND,
+ OP_ASSIGN,
+ OP_QUESTION,
+ OP_COLON,
+
+ OP_UNKNOWN,
+};
+
+struct calc_op;
+
+struct calc_elem {
+ dump_item item;
+ char oper;
+ int op_prio;
+};
+
+struct calc_op {
+ dump_item Left, Right;
+ char Oper;
+ bool force_long;
+ bool unary;
+ bool is_variable;
+ bool string_op; // TODO -> bitop
+
+ // for local vars
+ mutable bool calculated;
+ mutable eval_res_type result;
+
+ calc_op(calc_elem& left, calc_elem& right)
+ : Left(left.item)
+ , Right(right.item)
+ , Oper(right.oper)
+ , is_variable(false)
+ , calculated(false)
+ , result(false)
+ {
+ force_long = Oper == OP_XOR || Oper == OP_BITWISE_OR || Oper == OP_BITWISE_AND ||
+ Oper == OP_LOGICAL_OR || Oper == OP_LOGICAL_AND || Oper == OP_UNARY_NOT ||
+ Oper == OP_UNARY_COMPLEMENT || Oper == OP_LEFT_SHIFT || Oper == OP_RIGHT_SHIFT ||
+ Oper == OP_MODULUS;
+ unary = Oper == OP_UNARY_NOT || Oper == OP_UNARY_COMPLEMENT || Oper == OP_UNARY_MINUS ||
+ Oper == OP_LOG || Oper == OP_LOG10 || Oper == OP_ROUND;
+ string_op = IsStringType(Left.type) && IsStringType(Right.type) &&
+ (Oper == OP_REGEXP || Oper == OP_REGEXP_NOT || Oper == OP_EQUAL || Oper == OP_NOT_EQUAL ||
+ Oper == OP_LESS || Oper == OP_LESS_OR_EQUAL || Oper == OP_GREATER || Oper == OP_GREATER_OR_EQUAL);
+ if (Oper == OP_REGEXP || Oper == OP_REGEXP_NOT) {
+ if (!string_op)
+ ythrow yexception() << "calc-expr: regexp requested for non-strings";
+ ythrow yexception() << "calc-expr: regexps currently not supported";
+ }
+ }
+
+ Y_FORCE_INLINE void eval(const char** dd) const {
+ if (is_variable) {
+ if (!calculated) {
+ do_eval(dd);
+ calculated = true;
+ }
+ } else {
+ do_eval(dd);
+ }
+ }
+
+private:
+ Y_FORCE_INLINE void do_eval(const char** dd) const;
+};
+
+void calc_op::do_eval(const char** dd) const {
+ eval_res_type left1 = unary ? (eval_res_type) false : Left.eval(dd);
+ if (Oper == OP_QUESTION) {
+ left1.to_long();
+ if (left1.res_long) {
+ result = Right.eval(dd);
+ } else {
+ result = eval_res_type(); // null
+ }
+ return;
+ } else if (Oper == OP_COLON) {
+ if (left1.is_null()) {
+ result = Right.eval(dd);
+ } else {
+ result = left1;
+ }
+ return;
+ }
+
+ if (Y_UNLIKELY(string_op)) {
+ TStringBuf left2 = Left.GetStrBuf(dd);
+ TStringBuf right2 = Right.GetStrBuf(dd);
+ switch (Oper) {
+ case OP_REGEXP:
+ result = false;
+ break;
+ case OP_REGEXP_NOT:
+ result = false;
+ break;
+ case OP_EQUAL:
+ result = left2 == right2;
+ break;
+ case OP_NOT_EQUAL:
+ result = left2 != right2;
+ break;
+ case OP_LESS:
+ result = left2 < right2;
+ break;
+ case OP_LESS_OR_EQUAL:
+ result = left2 <= right2;
+ break;
+ case OP_GREATER:
+ result = left2 > right2;
+ break;
+ case OP_GREATER_OR_EQUAL:
+ result = left2 >= right2;
+ break;
+ default:
+ assert(false);
+ }
+ return;
+ }
+
+ eval_res_type right1 = Right.eval(dd);
+ if (force_long) { // logical ops will be all long
+ left1.to_long();
+ right1.to_long();
+ }
+ switch (Oper) {
+ case OP_ADD:
+ result = left1 + right1;
+ break;
+ case OP_SUBSTRACT:
+ result = left1 - right1;
+ break;
+ case OP_MULTIPLY:
+ result = left1 * right1;
+ break;
+ case OP_DIVIDE:
+ result = left1 / right1;
+ break;
+ case OP_MODULUS:
+ result = left1.res_long ? left1.res_long % right1.res_long : 0;
+ break;
+ case OP_LEFT_SHIFT:
+ result = left1.res_long << right1.res_long;
+ break;
+ case OP_RIGHT_SHIFT:
+ result = left1.res_long >> right1.res_long;
+ break;
+ case OP_EQUAL:
+ result = left1 == right1;
+ break;
+ case OP_NOT_EQUAL:
+ result = !(left1 == right1);
+ break;
+ case OP_LESS:
+ result = left1 < right1;
+ break;
+ case OP_LESS_OR_EQUAL:
+ result = !(right1 < left1);
+ break; // <=
+ case OP_GREATER:
+ result = right1 < left1;
+ break;
+ case OP_GREATER_OR_EQUAL:
+ result = !(left1 < right1);
+ break; // >=
+ case OP_XOR:
+ result = left1.res_long ^ right1.res_long;
+ break;
+ case OP_BITWISE_OR:
+ result = left1.res_long | right1.res_long;
+ break;
+ case OP_BITWISE_AND:
+ result = left1.res_long & right1.res_long;
+ break;
+ case OP_LOGICAL_OR:
+ result = left1.res_long || right1.res_long;
+ break;
+ case OP_LOGICAL_AND:
+ result = left1.res_long && right1.res_long;
+ break;
+ case OP_UNARY_NOT:
+ result = !right1.res_long;
+ break;
+ case OP_UNARY_COMPLEMENT:
+ result = ~right1.res_long;
+ break;
+ case OP_UNARY_MINUS:
+ result = Minus(right1);
+ break;
+ case OP_LOG:
+ result = Log(right1);
+ break;
+ case OP_LOG10:
+ result = Log10(right1);
+ break;
+ case OP_ROUND:
+ result = Round(right1);
+ break;
+ default:
+ assert(false);
+ }
+}
+
+namespace {
+ // copy-paste of fcat(TString)
+ // we don't want it to be too slow, yet we don't want do slow down our
+ // main functionality, libc fprintf, even a little
+ size_t Y_PRINTF_FORMAT(2, 3) fprintf(TString* s, const char* c, ...) {
+ TStringOutput so(*s);
+
+ va_list params;
+ va_start(params, c);
+ const size_t ret = Printf(so, c, params);
+ va_end(params);
+
+ return ret;
+ }
+ size_t Y_PRINTF_FORMAT(2, 3) fprintf(IOutputStream* s, const char* c, ...) {
+ va_list params;
+ va_start(params, c);
+ const size_t ret = Printf(*s, c, params);
+ va_end(params);
+
+ return ret;
+ }
+}
+
+template <class TOut>
+void dump_item::print(TOut* p, const char** dd) const {
+ const char* d = dd[pack_id];
+ const fake* f = reinterpret_cast<const fake*>(d);
+
+ switch (type) {
+ case DIT_FAKE_ITEM:
+ assert(false);
+ break;
+ case DIT_MATH_RESULT:
+ assert(false);
+ break; // must call eval instead
+ case DIT_NAME:
+ assert(false);
+ break; // no op
+
+ case DIT_BOOL_FIELD:
+ fprintf(p, *(bool*)(d + field_offset) ? "true" : "false");
+ break;
+ case DIT_UI8_FIELD:
+ fprintf(p, "%u", *(ui8*)(d + field_offset));
+ break;
+ case DIT_UI16_FIELD:
+ fprintf(p, "%u", *(ui16*)(d + field_offset));
+ break;
+ case DIT_UI32_FIELD:
+ fprintf(p, "%u", *(ui32*)(d + field_offset));
+ break;
+ case DIT_I64_FIELD:
+ fprintf(p, "%" PRId64, *(i64*)(d + field_offset));
+ break;
+ case DIT_UI64_FIELD:
+ fprintf(p, "%" PRIu64, *(ui64*)(d + field_offset));
+ break;
+ case DIT_FLOAT_FIELD:
+ fprintf(p, "%.4f", *(float*)(d + field_offset));
+ break;
+ case DIT_DOUBLE_FIELD:
+ fprintf(p, "%.7f", *(double*)(d + field_offset));
+ break;
+ case DIT_TIME_T32_FIELD:
+ fprintf(p, "%ld", (long)*(time_t32*)(d + field_offset));
+ break;
+ case DIT_PF16UI32_FIELD:
+ fprintf(p, "%u", (ui32) * (pf16ui32*)(d + field_offset));
+ break;
+ case DIT_PF16FLOAT_FIELD:
+ fprintf(p, "%.4f", (float)*(pf16float*)(d + field_offset));
+ break;
+ case DIT_SF16FLOAT_FIELD:
+ fprintf(p, "%.4f", (float)*(sf16float*)(d + field_offset));
+ break;
+ case DIT_STRING_FIELD:
+ fprintf(p, "%s", (d + field_offset));
+ break;
+
+ case DIT_LONG_CONST:
+ fprintf(p, "%ld", long_const);
+ break;
+ case DIT_FLOAT_CONST:
+ fprintf(p, "%.4f", float_const);
+ break;
+ case DIT_STR_CONST:
+ fprintf(p, "%.*s", (int)the_buf.size(), the_buf.data());
+ break;
+
+ case DIT_INT_FUNCTION:
+ fprintf(p, "%d", (f->*int_fn)());
+ break;
+ case DIT_FLOAT_FUNCTION:
+ fprintf(p, "%.4f", (f->*float_fn)());
+ break;
+ case DIT_BOOL_FUNCTION:
+ fprintf(p, "%d", (f->*bool_fn)());
+ break;
+ case DIT_STR_FUNCTION:
+ fprintf(p, "%s", (f->*str_fn)());
+ break;
+ case DIT_STRBUF_FUNCTION:
+ the_buf.clear();
+ fprintf(p, "%s", (f->*strbuf_2_fn)(the_buf, nullptr));
+ break;
+
+ case DIT_UI8_EXT_FUNCTION:
+ fprintf(p, "%u", (*ui8_ext_fn)(f));
+ break;
+ case DIT_UI16_EXT_FUNCTION:
+ fprintf(p, "%u", (*ui16_ext_fn)(f));
+ break;
+ case DIT_UI32_EXT_FUNCTION:
+ fprintf(p, "%u", (*ui32_ext_fn)(f));
+ break;
+ case DIT_UI64_EXT_FUNCTION:
+ fprintf(p, "%" PRIu64, (*ui64_ext_fn)(f));
+ break;
+
+ case DIT_UI8_ENUM_EQ:
+ fprintf(p, "%d", *(ui8*)(d + field_offset) == enum_val);
+ break;
+ case DIT_UI8_ENUM_SET:
+ fprintf(p, "%d", !!(*(ui8*)(d + field_offset) & enum_val));
+ break;
+
+ case DIT_UI16_ENUM_EQ:
+ fprintf(p, "%d", *(ui16*)(d + field_offset) == enum_val);
+ break;
+ case DIT_UI16_ENUM_SET:
+ fprintf(p, "%d", !!(*(ui16*)(d + field_offset) & enum_val));
+ break;
+
+ case DIT_UI32_ENUM_EQ:
+ fprintf(p, "%d", *(ui32*)(d + field_offset) == enum_val);
+ break;
+ case DIT_UI32_ENUM_SET:
+ fprintf(p, "%d", !!(*(ui32*)(d + field_offset) & enum_val));
+ break;
+
+ case DIT_INT_ENUM_FUNCTION_EQ:
+ fprintf(p, "%d", (ui32)(f->*int_enum_fn)() == enum_val);
+ break;
+ case DIT_INT_ENUM_FUNCTION_SET:
+ fprintf(p, "%d", !!(ui32)((f->*int_enum_fn)() & enum_val));
+ break;
+
+ case DIT_BOOL_FUNC_FIXED_STR:
+ fprintf(p, "%u", (ui32)(f->*bool_strbuf_fn)(the_buf));
+ break;
+ case DIT_UI8_FUNC_FIXED_STR:
+ fprintf(p, "%u", (ui32)(f->*ui8_strbuf_fn)(the_buf));
+ break;
+ case DIT_UI16_FUNC_FIXED_STR:
+ fprintf(p, "%u", (ui32)(f->*ui16_strbuf_fn)(the_buf));
+ break;
+ case DIT_UI32_FUNC_FIXED_STR:
+ fprintf(p, "%u", (f->*ui32_strbuf_fn)(the_buf));
+ break;
+ case DIT_I64_FUNC_FIXED_STR:
+ fprintf(p, "%" PRId64, (f->*i64_strbuf_fn)(the_buf));
+ break;
+ case DIT_UI64_FUNC_FIXED_STR:
+ fprintf(p, "%" PRIu64, (f->*ui64_strbuf_fn)(the_buf));
+ break;
+ case DIT_FLOAT_FUNC_FIXED_STR:
+ fprintf(p, "%.4f", (f->*float_strbuf_fn)(the_buf));
+ break;
+ case DIT_DOUBLE_FUNC_FIXED_STR:
+ fprintf(p, "%.7f", (f->*double_strbuf_fn)(the_buf));
+ break;
+
+ case DIT_RESOLVE_BY_NAME:
+ fprintf(p, "%s", (f->*resolve_fn)(the_buf).data());
+ break;
+
+ default:
+ assert(false);
+ break;
+ }
+}
+
+// instantiate, just for a case
+template void dump_item::print<FILE>(FILE* p, const char** dd) const;
+template void dump_item::print<TString>(TString* p, const char** dd) const;
+template void dump_item::print<IOutputStream>(IOutputStream* p, const char** dd) const;
+
+TStringBuf dump_item::GetStrBuf(const char** dd) const {
+ const char* d = dd[pack_id];
+ const fake* f = reinterpret_cast<const fake*>(d);
+ switch (type) {
+ case DIT_STRING_FIELD:
+ return d + field_offset;
+ case DIT_STR_CONST:
+ return the_buf;
+ case DIT_STR_FUNCTION:
+ return (f->*str_fn)();
+ case DIT_STRBUF_FUNCTION:
+ the_buf.clear();
+ return (f->*strbuf_2_fn)(the_buf, nullptr);
+ case DIT_RESOLVE_BY_NAME:
+ return (f->*resolve_fn)(the_buf);
+ default:
+ assert(false);
+ return TStringBuf();
+ }
+}
+
+// recursive
+eval_res_type dump_item::eval(const char** dd) const {
+ const char* d = dd[pack_id];
+ const fake* f = reinterpret_cast<const fake*>(d);
+
+ switch (type) {
+ case DIT_FAKE_ITEM:
+ assert(false);
+ return (long int)0;
+ case DIT_MATH_RESULT:
+ this->op->eval(dd);
+ return this->op->result;
+ case DIT_NAME:
+ assert(false);
+ return (long int)0;
+
+ case DIT_BOOL_FIELD:
+ return (ui32) * (bool*)(d + field_offset);
+ case DIT_UI8_FIELD:
+ return (ui32) * (ui8*)(d + field_offset);
+ case DIT_UI16_FIELD:
+ return (ui32) * (ui16*)(d + field_offset);
+ case DIT_UI32_FIELD:
+ return (ui32) * (ui32*)(d + field_offset);
+ case DIT_I64_FIELD:
+ return (long)*(i64*)(d + field_offset); // TODO: 64 bit support in calculator?
+ case DIT_UI64_FIELD:
+ return (long)*(ui64*)(d + field_offset); // TODO: 64 bit support in calculator?
+ case DIT_FLOAT_FIELD:
+ return (float)*(float*)(d + field_offset);
+ case DIT_DOUBLE_FIELD:
+ return *(double*)(d + field_offset);
+ case DIT_TIME_T32_FIELD:
+ return (long)*(time_t32*)(d + field_offset);
+ case DIT_PF16UI32_FIELD:
+ return (ui32) * (pf16ui32*)(d + field_offset);
+ case DIT_PF16FLOAT_FIELD:
+ return (float)*(pf16float*)(d + field_offset);
+ case DIT_SF16FLOAT_FIELD:
+ return (float)*(sf16float*)(d + field_offset);
+ case DIT_STRING_FIELD:
+ return !!d[field_offset]; // we don't have any string functions, just 0 if empty
+
+ case DIT_LONG_CONST:
+ return long_const;
+ case DIT_FLOAT_CONST:
+ return float_const;
+ case DIT_STR_CONST:
+ return !!the_buf;
+
+ case DIT_INT_FUNCTION:
+ return (long)(f->*int_fn)();
+ case DIT_FLOAT_FUNCTION:
+ return (float)(f->*float_fn)();
+ case DIT_BOOL_FUNCTION:
+ return (long)(f->*bool_fn)();
+ case DIT_STR_FUNCTION:
+ return !!*(f->*str_fn)(); // string -> int
+ case DIT_STRBUF_FUNCTION:
+ the_buf.clear();
+ return !!*(f->*strbuf_2_fn)(the_buf, nullptr); // string -> 0/1
+
+ case DIT_UI8_EXT_FUNCTION:
+ return (ui32)(*ui8_ext_fn)(f);
+ case DIT_UI16_EXT_FUNCTION:
+ return (ui32)(*ui16_ext_fn)(f);
+ case DIT_UI32_EXT_FUNCTION:
+ return (ui32)(*ui32_ext_fn)(f);
+ case DIT_UI64_EXT_FUNCTION:
+ return (long)(*ui64_ext_fn)(f); // TODO: 64 bit support in calculator?
+
+ case DIT_UI8_ENUM_EQ:
+ return (ui32)(*(ui8*)(d + field_offset) == enum_val);
+ case DIT_UI8_ENUM_SET:
+ return !!(ui32)(*(ui8*)(d + field_offset) & enum_val);
+
+ case DIT_UI16_ENUM_EQ:
+ return (ui32)(*(ui16*)(d + field_offset) == enum_val);
+ case DIT_UI16_ENUM_SET:
+ return !!(ui32)(*(ui16*)(d + field_offset) & enum_val);
+
+ case DIT_UI32_ENUM_EQ:
+ return (ui32)(*(ui32*)(d + field_offset) == enum_val);
+ case DIT_UI32_ENUM_SET:
+ return !!(ui32)(*(ui32*)(d + field_offset) & enum_val);
+
+ case DIT_INT_ENUM_FUNCTION_EQ:
+ return (ui32)((ui32)(f->*int_enum_fn)() == enum_val);
+ case DIT_INT_ENUM_FUNCTION_SET:
+ return !!(ui32)((ui32)(f->*int_enum_fn)() & enum_val);
+
+ case DIT_BOOL_FUNC_FIXED_STR:
+ return (ui32)(f->*bool_strbuf_fn)(the_buf);
+ case DIT_UI8_FUNC_FIXED_STR:
+ return (ui32)(f->*ui8_strbuf_fn)(the_buf);
+ case DIT_UI16_FUNC_FIXED_STR:
+ return (ui32)(f->*ui16_strbuf_fn)(the_buf);
+ case DIT_UI32_FUNC_FIXED_STR:
+ return (ui32)(f->*ui32_strbuf_fn)(the_buf);
+ case DIT_I64_FUNC_FIXED_STR:
+ return (long)(f->*i64_strbuf_fn)(the_buf);
+ case DIT_UI64_FUNC_FIXED_STR:
+ return (long)(f->*ui64_strbuf_fn)(the_buf);
+ case DIT_FLOAT_FUNC_FIXED_STR:
+ return (float)(f->*float_strbuf_fn)(the_buf);
+ case DIT_DOUBLE_FUNC_FIXED_STR:
+ return (double)(f->*double_strbuf_fn)(the_buf);
+
+ case DIT_RESOLVE_BY_NAME:
+ return !!(f->*resolve_fn)(the_buf);
+
+ default:
+ assert(false);
+ break;
+ }
+
+ // unreached
+ return eval_res_type(false);
+}
+
+void dump_item::set_arrind(int arrind) {
+ switch (type) {
+ case DIT_BOOL_FIELD:
+ field_offset += arrind * sizeof(bool);
+ break;
+ case DIT_UI8_FIELD:
+ field_offset += arrind * sizeof(ui8);
+ break;
+ case DIT_UI16_FIELD:
+ field_offset += arrind * sizeof(ui16);
+ break;
+ case DIT_UI32_FIELD:
+ field_offset += arrind * sizeof(ui32);
+ break;
+ case DIT_I64_FIELD:
+ field_offset += arrind * sizeof(i64);
+ break;
+ case DIT_UI64_FIELD:
+ field_offset += arrind * sizeof(ui64);
+ break;
+ case DIT_FLOAT_FIELD:
+ field_offset += arrind * sizeof(float);
+ break;
+ case DIT_DOUBLE_FIELD:
+ field_offset += arrind * sizeof(double);
+ break;
+ case DIT_TIME_T32_FIELD:
+ field_offset += arrind * sizeof(time_t32);
+ break;
+ case DIT_PF16UI32_FIELD:
+ field_offset += arrind * sizeof(pf16ui32);
+ break;
+ case DIT_PF16FLOAT_FIELD:
+ field_offset += arrind * sizeof(pf16float);
+ break;
+ case DIT_SF16FLOAT_FIELD:
+ field_offset += arrind * sizeof(sf16float);
+ break;
+ default:
+ break;
+ }
+}
+
+static str_spn FieldNameChars("a-zA-Z0-9_$", true);
+static str_spn MathOpChars("-+=*%/&|<>()!~^?:#", true);
+static str_spn SpaceChars("\t\n\r ", true);
+
+TFieldCalculatorBase::TFieldCalculatorBase() {
+}
+
+TFieldCalculatorBase::~TFieldCalculatorBase() = default;
+
+bool TFieldCalculatorBase::item_by_name(dump_item& it, const char* name) const {
+ for (size_t i = 0; i < named_dump_items.size(); i++) {
+ const named_dump_item* list = named_dump_items[i].first;
+ size_t sz = named_dump_items[i].second;
+ for (unsigned int n = 0; n < sz; n++) {
+ if (!stricmp(name, list[n].name)) {
+ it = list[n].item;
+ it.pack_id = i;
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+bool TFieldCalculatorBase::get_local_var(dump_item& dst, char* var_name) {
+ TMap<const char*, dump_item>::const_iterator it = local_vars.find(var_name);
+ if (it == local_vars.end()) {
+ // New local variable
+ dst.type = DIT_LOCAL_VARIABLE;
+ dst.local_var_name = pool.append(var_name);
+ return false;
+ } else {
+ dst = it->second;
+ return true;
+ }
+}
+
+char* TFieldCalculatorBase::get_field(dump_item& dst, char* s) {
+ if (!stricmp(s, "name")) {
+ dst.type = DIT_NAME;
+ return s + 4; // leave there 0
+ }
+
+ if (*s == '"' || *s == '\'') {
+ char* end = strchr(s + 1, *s);
+ bool hasEsc = false;
+ while (end && end > s + 1 && end[-1] == '\\') {
+ end = strchr(end + 1, *s);
+ hasEsc = true;
+ }
+ if (!end)
+ ythrow yexception() << "calc-expr: unterminated string constant at " << s;
+ dst.type = DIT_STR_CONST;
+ dst.the_buf.assign(s + 1, end);
+ if (hasEsc)
+ SubstGlobal(dst.the_buf, *s == '"' ? "\\\"" : "\\'", *s == '"' ? "\"" : "'");
+ dst.set_arrind(0); // just for a case
+ return end + 1;
+ }
+
+ bool is_number = isdigit((ui8)*s) || (*s == '+' || *s == '-') && isdigit((ui8)s[1]), is_float = false;
+ char* end = FieldNameChars.cbrk(s + is_number);
+ if (is_number && *end == '.') {
+ is_float = true;
+ end = FieldNameChars.cbrk(end + 1);
+ }
+ char* next = SpaceChars.cbrk(end);
+ int arr_index = 0;
+ bool has_arr_index = false;
+ if (*next == '[') {
+ arr_index = atoi(next + 1);
+ has_arr_index = true;
+ next = strchr(next, ']');
+ if (!next)
+ ythrow yexception() << "calc-expr: No closing ']' for '" << s << "'";
+ next = SpaceChars.cbrk(next + 1);
+ }
+ char end_sav = *end;
+ *end = 0;
+
+ if (!item_by_name(dst, s)) {
+ if (!is_number) {
+ get_local_var(dst, s);
+ } else if (is_float) {
+ dst = (float)strtod(s, nullptr);
+ } else
+ dst = strtol(s, nullptr, 10);
+
+ dst.pack_id = 0;
+ *end = end_sav;
+ return next;
+ }
+
+ // check array/not array
+ if (has_arr_index && !dst.is_array_field())
+ ythrow yexception() << "calc-expr: field " << s << " is not an array";
+
+ //if (!has_arr_index && dst.is_array_field())
+ // yexception("calc-expr: field %s is array, index required", s);
+
+ if (has_arr_index && (arr_index < 0 || arr_index >= dst.arr_length))
+ ythrow yexception() << "calc-expr: array index [" << arr_index << "] is out of range for field " << s << " (length is " << dst.arr_length << ")";
+
+ *end = end_sav;
+ dst.set_arrind(arr_index);
+ return next;
+}
+
+// BEGIN Stack calculator functions
+inline char* skipspace(char* c, int& bracket_depth) {
+ while ((ui8)*c <= ' ' && *c || *c == '(' || *c == ')') {
+ if (*c == '(')
+ bracket_depth++;
+ else if (*c == ')')
+ bracket_depth--;
+ c++;
+ }
+ return c;
+}
+
+void ensure_defined(const dump_item& item) {
+ if (item.type == DIT_LOCAL_VARIABLE) {
+ ythrow yexception() << "Usage of non-defined field or local variable '" << item.local_var_name << "'";
+ }
+}
+
+void TFieldCalculatorBase::emit_op(TVector<calc_op>& ops, calc_elem& left, calc_elem& right) {
+ int out_op = ops.size();
+ char oper = right.oper;
+ ensure_defined(right.item);
+ if (oper == OP_ASSIGN) {
+ if (left.item.type != DIT_LOCAL_VARIABLE) {
+ ythrow yexception() << "Assignment only to local variables is allowed";
+ }
+ if (local_vars.find(left.item.local_var_name) != local_vars.end()) {
+ ythrow yexception() << "Reassignment to the local variable " << left.item.local_var_name << " is not allowed";
+ }
+ local_vars[left.item.local_var_name] = right.item;
+ if (right.item.type == DIT_MATH_RESULT) {
+ calc_ops[right.item.arr_ind].is_variable = true;
+ }
+ left = right;
+ } else {
+ ensure_defined(left.item);
+ ops.push_back(calc_op(left, right));
+ left.item.type = DIT_MATH_RESULT;
+ left.item.arr_ind = out_op;
+ }
+}
+
+inline int get_op_prio(char c) {
+ switch (c) {
+ case OP_ASSIGN:
+ return 1;
+ case OP_QUESTION:
+ case OP_COLON:
+ return 2;
+ case OP_LOGICAL_OR:
+ return 3;
+ case OP_LOGICAL_AND:
+ return 4;
+ case OP_BITWISE_OR:
+ return 5;
+ case OP_XOR:
+ return 6;
+ case OP_BITWISE_AND:
+ return 7;
+ case OP_EQUAL:
+ case OP_NOT_EQUAL:
+ return 8;
+ case OP_LESS:
+ case OP_LESS_OR_EQUAL:
+ case OP_GREATER:
+ case OP_GREATER_OR_EQUAL:
+ return 9;
+ case OP_LEFT_SHIFT:
+ case OP_RIGHT_SHIFT:
+ return 10;
+ case OP_ADD:
+ case OP_SUBSTRACT:
+ return 11;
+ case OP_MULTIPLY:
+ case OP_DIVIDE:
+ case OP_MODULUS:
+ return 12;
+ case OP_REGEXP:
+ case OP_REGEXP_NOT:
+ return 13;
+ case OP_UNARY_NOT:
+ case OP_UNARY_COMPLEMENT:
+ case OP_UNARY_MINUS:
+ case OP_LOG:
+ case OP_LOG10:
+ case OP_ROUND:
+ return 14;
+ default:
+ return 0;
+ }
+}
+
+Operators get_oper(char*& c, bool unary_op_near) {
+ Operators cur_oper = OP_UNKNOWN;
+ switch (*c++) {
+ case '&':
+ if (*c == '&')
+ cur_oper = OP_LOGICAL_AND, c++;
+ else
+ cur_oper = OP_BITWISE_AND;
+ break;
+ case '|':
+ if (*c == '|')
+ cur_oper = OP_LOGICAL_OR, c++;
+ else
+ cur_oper = OP_BITWISE_OR;
+ break;
+ case '<':
+ if (*c == '=')
+ cur_oper = OP_LESS_OR_EQUAL, c++;
+ else if (*c == '<')
+ cur_oper = OP_LEFT_SHIFT, c++;
+ else
+ cur_oper = OP_LESS;
+ break;
+ case '>':
+ if (*c == '=')
+ cur_oper = OP_GREATER_OR_EQUAL, c++;
+ else if (*c == '>')
+ cur_oper = OP_RIGHT_SHIFT, c++;
+ else
+ cur_oper = OP_GREATER;
+ break;
+ case '!':
+ if (*c == '=')
+ cur_oper = OP_NOT_EQUAL, c++;
+ else if (*c == '~')
+ cur_oper = OP_REGEXP_NOT, c++;
+ else
+ cur_oper = OP_UNARY_NOT;
+ break;
+ case '=':
+ if (*c == '=')
+ cur_oper = OP_EQUAL, c++;
+ else if (*c == '~')
+ cur_oper = OP_REGEXP, c++;
+ else
+ cur_oper = OP_ASSIGN;
+ break;
+ case '-':
+ if (unary_op_near)
+ cur_oper = OP_UNARY_MINUS;
+ else
+ cur_oper = OP_SUBSTRACT;
+ break;
+ case '#':
+ if (!strncmp(c, "LOG#", 4)) {
+ cur_oper = OP_LOG;
+ c += 4;
+ } else if (!strncmp(c, "LOG10#", 6)) {
+ cur_oper = OP_LOG10;
+ c += 6;
+ } else if (!strncmp(c, "ROUND#", 6)) {
+ cur_oper = OP_ROUND;
+ c += 6;
+ }
+ break;
+ case '+':
+ cur_oper = OP_ADD;
+ break;
+ case '*':
+ cur_oper = OP_MULTIPLY;
+ break;
+ case '/':
+ cur_oper = OP_DIVIDE;
+ break;
+ case '%':
+ cur_oper = OP_MODULUS;
+ break;
+ case '^':
+ cur_oper = OP_XOR;
+ break;
+ case '~':
+ cur_oper = OP_UNARY_COMPLEMENT;
+ break;
+ case '?':
+ cur_oper = OP_QUESTION;
+ break;
+ case ':':
+ cur_oper = OP_COLON;
+ break;
+ }
+ return cur_oper;
+}
+// END Stack calculator functions
+
+void TFieldCalculatorBase::Compile(char** field_names, int field_count) {
+ out_el = 0, out_cond = 0;
+ autoarray<dump_item>(field_count).swap(printouts);
+ autoarray<dump_item>(field_count).swap(conditions);
+ local_vars.clear();
+
+ // parse arguments into calculator's "pseudo-code"
+ for (int el = 0; el < field_count; el++) {
+ char* c = field_names[el];
+ bool is_expr = !!*MathOpChars.brk(c), is_cond = *c == '?';
+ if (is_cond)
+ c++;
+ if (!is_expr && !is_cond) {
+ get_field(printouts[out_el], c);
+ ensure_defined(printouts[out_el]);
+ ++out_el;
+ continue;
+ } else { // Stack Calculator
+ const int maxstack = 64;
+ calc_elem fstack[maxstack]; // calculator's stack
+ int bdepth = 0; // brackets depth
+ int stack_cur = -1;
+ bool unary_op_near = false; // indicates that the next operator in unary
+ bool had_assignment_out_of_brackets = false;
+ int uop_seq = 0; // maintains right-to left order for unary operators
+ while (*(c = skipspace(c, bdepth))) {
+ /** https://wiki.yandex.ru/JandeksPoisk/Antispam/OwnersData/attselect#calc */
+ //printf("1.%i c = '%s'\n", unary_op_near, c);
+ Operators cur_oper = OP_UNKNOWN;
+ int op_prio = 0;
+ if (stack_cur >= 0) {
+ cur_oper = get_oper(c, unary_op_near);
+ op_prio = get_op_prio(cur_oper);
+ if (!op_prio)
+ ythrow yexception() << "calc-expr: Unsupported operator '" << c[-1] << "'";
+ op_prio += bdepth * 256 + uop_seq;
+ if (unary_op_near)
+ uop_seq += 20;
+ while (op_prio <= fstack[stack_cur].op_prio && stack_cur > 0) {
+ emit_op(calc_ops, fstack[stack_cur - 1], fstack[stack_cur]);
+ stack_cur--;
+ }
+ }
+ //printf("2.%i c = '%s'\n", unary_op_near, c);
+ had_assignment_out_of_brackets |= (bdepth == 0 && cur_oper == OP_ASSIGN);
+ c = skipspace(c, bdepth);
+ unary_op_near = *c == '-' && !isdigit((ui8)c[1]) || *c == '~' || (*c == '!' && c[1] != '=') ||
+ !strncmp(c, "#LOG#", 5) || !strncmp(c, "#LOG10#", 7) || !strncmp(c, "#ROUND#", 7);
+ if (!unary_op_near)
+ uop_seq = 0;
+ if (stack_cur >= maxstack - 1)
+ ythrow yexception() << "calc-expr: Math eval stack overflow!\n";
+ stack_cur++;
+ fstack[stack_cur].oper = cur_oper;
+ fstack[stack_cur].op_prio = op_prio;
+ //printf("3.%i c = '%s'\n", unary_op_near, c);
+ if (unary_op_near)
+ fstack[stack_cur].item = dump_item();
+ else
+ c = get_field(fstack[stack_cur].item, c);
+ }
+ while (stack_cur > 0) {
+ emit_op(calc_ops, fstack[stack_cur - 1], fstack[stack_cur]);
+ stack_cur--;
+ }
+ ensure_defined(fstack[0].item);
+ if (is_cond) {
+ if (had_assignment_out_of_brackets)
+ ythrow yexception() << "Assignment in condition. (Did you mean '==' instead of '='?)";
+ if (fstack[0].item.type != DIT_FAKE_ITEM) // Skip empty conditions: "?()".
+ conditions[out_cond++] = fstack[0].item;
+ } else if (!had_assignment_out_of_brackets) {
+ printouts[out_el++] = fstack[0].item;
+ }
+ }
+ }
+ // calc_ops will not grow any more, so arr_ind -> op
+ for (int n = 0; n < out_cond; n++)
+ conditions[n].rewrite_op(calc_ops.data());
+ for (int n = 0; n < out_el; n++)
+ printouts[n].rewrite_op(calc_ops.data());
+ for (auto& local_var : local_vars) {
+ local_var.second.rewrite_op(calc_ops.data());
+ }
+ for (int n = 0; n < (int)calc_ops.size(); n++) {
+ calc_ops[n].Left.rewrite_op(calc_ops.data());
+ calc_ops[n].Right.rewrite_op(calc_ops.data());
+ }
+}
+
+void dump_item::rewrite_op(const calc_op* ops) {
+ if (type == DIT_MATH_RESULT)
+ op = ops + arr_ind;
+}
+
+void TFieldCalculatorBase::MarkLocalVarsAsUncalculated() {
+ for (auto& local_var : local_vars) {
+ if (local_var.second.type == DIT_MATH_RESULT) {
+ local_var.second.op->calculated = false;
+ }
+ }
+}
+
+bool TFieldCalculatorBase::Cond(const char** d) {
+ MarkLocalVarsAsUncalculated();
+ for (int n = 0; n < out_cond; n++) {
+ /** https://wiki.yandex.ru/JandeksPoisk/Antispam/OwnersData/attselect#conditions */
+ eval_res_type res = conditions[n].eval(d);
+ bool is_true = res.type == 0 ? !!res.res_ui32 : res.type == 1 ? !!res.res_long : !!res.res_dbl;
+ if (!is_true)
+ return false;
+ }
+ return true;
+}
+
+bool TFieldCalculatorBase::CondById(const char** d, int condNumber) {
+ MarkLocalVarsAsUncalculated();
+ if (condNumber >= out_cond)
+ return false;
+ eval_res_type res = conditions[condNumber].eval(d);
+ bool is_true = res.type == 0 ? !!res.res_ui32 : res.type == 1 ? !!res.res_long : !!res.res_dbl;
+ if (!is_true)
+ return false;
+ return true;
+}
+
+void TFieldCalculatorBase::Print(FILE* p, const char** d, const char* Name) {
+ for (int n = 0; n < out_el; n++) {
+ if (printouts[n].type == DIT_NAME) {
+ fprintf(p, "%s", Name);
+ } else if (printouts[n].type == DIT_MATH_RESULT) { // calculate
+ eval_res_type res = printouts[n].eval(d);
+ switch (res.type) {
+ case 0:
+ fprintf(p, "%u", res.res_ui32);
+ break;
+ case 1:
+ fprintf(p, "%ld", res.res_long);
+ break;
+ case 2:
+ fprintf(p, "%f", res.res_dbl);
+ break;
+ }
+ } else {
+ printouts[n].print(p, d);
+ }
+ fprintf(p, n != out_el - 1 ? "\t" : "\n");
+ }
+}
+
+void TFieldCalculatorBase::CalcAll(const char** d, TVector<float>& result) const {
+ result.clear();
+ for (int n = 0; n < out_el; ++n) {
+ if (printouts[n].type == DIT_MATH_RESULT || printouts[n].type == DIT_FLOAT_FIELD) {
+ eval_res_type res = printouts[n].eval(d);
+ result.push_back(res.res_dbl);
+ }
+ }
+}
+
+void TFieldCalculatorBase::SelfTest() {
+ if (out_el < 1)
+ ythrow yexception() << "Please specify conditions for test mode";
+ const char* dummy = "";
+ eval_res_type res = printouts[0].eval(&dummy);
+ switch (res.type) {
+ case 0:
+ printf("%u\n", res.res_ui32);
+ break;
+ case 1:
+ printf("%ld\n", res.res_long);
+ break;
+ case 2:
+ printf("%f\n", res.res_dbl);
+ break;
+ }
+}
+
+void TFieldCalculatorBase::PrintDiff(const char* rec1, const char* rec2) {
+ for (size_t n = 0; n < named_dump_items[0].second; n++) {
+ const dump_item& field = named_dump_items[0].first[n].item;
+ if (!field.is_field())
+ continue; // not really a field
+ for (int ind = 0, arrsz = field.is_array_field() ? field.arr_length : 1; ind < arrsz; ind++) {
+ intptr_t sav_field_offset = field.field_offset;
+ const_cast<dump_item&>(field).set_arrind(ind);
+ if (field.eval(&rec1) == field.eval(&rec2)) {
+ const_cast<dump_item&>(field).field_offset = sav_field_offset;
+ continue;
+ }
+ if (field.is_array_field())
+ printf("\t%s[%i]: ", named_dump_items[0].first[n].name, ind);
+ else
+ printf("\t%s: ", named_dump_items[0].first[n].name);
+ field.print(stdout, &rec1);
+ printf(" -> ");
+ field.print(stdout, &rec2);
+ const_cast<dump_item&>(field).field_offset = sav_field_offset;
+ }
+ }
+}
+
+void TFieldCalculatorBase::DumpAll(IOutputStream& s, const char** d, const TStringBuf& delim) {
+ bool firstPrinted = false;
+ for (size_t k = 0; k < named_dump_items.size(); k++) {
+ const named_dump_item* fields = named_dump_items[k].first;
+ size_t numFields = named_dump_items[k].second;
+ const char* obj = d[k];
+ for (size_t n = 0; n < numFields; n++) {
+ const dump_item& field = fields[n].item;
+ if (!field.is_field())
+ continue;
+ for (int ind = 0, arrsz = field.is_array_field() ? field.arr_length : 1; ind < arrsz; ind++) {
+ if (firstPrinted)
+ s << delim;
+ else
+ firstPrinted = true;
+ s << fields[n].name;
+ if (field.is_array_field())
+ Printf(s, "[%i]", ind);
+ s << "=";
+ intptr_t sav_field_offset = field.field_offset;
+ const_cast<dump_item&>(field).set_arrind(ind);
+ field.print(&s, &obj);
+ const_cast<dump_item&>(field).field_offset = sav_field_offset;
+ }
+ }
+ }
+}
diff --git a/library/cpp/fieldcalc/field_calc.h b/library/cpp/fieldcalc/field_calc.h
new file mode 100644
index 0000000000..46bf371a60
--- /dev/null
+++ b/library/cpp/fieldcalc/field_calc.h
@@ -0,0 +1,136 @@
+#pragma once
+
+#include <cstdio>
+
+#include <library/cpp/deprecated/autoarray/autoarray.h>
+#include <util/generic/map.h>
+#include <util/generic/vector.h>
+#include <util/memory/segmented_string_pool.h>
+
+struct dump_item;
+struct calc_op;
+struct named_dump_item;
+struct calc_elem;
+class IOutputStream;
+
+template <class T>
+std::pair<const named_dump_item*, size_t> get_named_dump_items();
+
+class TFieldCalculatorBase {
+private:
+ segmented_string_pool pool;
+ void emit_op(TVector<calc_op>& ops, calc_elem& left, calc_elem& right);
+ void MarkLocalVarsAsUncalculated();
+
+protected:
+ autoarray<dump_item> printouts, conditions;
+ int out_el, out_cond;
+ TVector<calc_op> calc_ops; // operands for calculator, indexed by arr_ind for DIT_math_result
+
+ TVector<std::pair<const named_dump_item*, size_t>> named_dump_items;
+ TMap<const char*, dump_item> local_vars;
+
+ char* get_field(dump_item& dst, char* s);
+ bool get_local_var(dump_item& dst, char* s);
+ virtual bool item_by_name(dump_item& it, const char* name) const;
+
+ TFieldCalculatorBase();
+ virtual ~TFieldCalculatorBase();
+
+ bool Cond(const char** d);
+ bool CondById(const char** d, int condNumber);
+ void Print(FILE* p, const char** d, const char* Name);
+ void Compile(char** field_names, int field_count);
+ void SelfTest();
+ void PrintDiff(const char* d1, const char* d2);
+ void CalcAll(const char** d, TVector<float>& result) const;
+ void DumpAll(IOutputStream& s, const char** d, const TStringBuf& delim);
+};
+
+template <class T>
+class TFieldCalculator: protected TFieldCalculatorBase {
+public:
+ TFieldCalculator() {
+ named_dump_items.push_back(get_named_dump_items<T>());
+ }
+
+ ~TFieldCalculator() override = default;
+
+ bool Cond(const T& d) {
+ const char* dd = reinterpret_cast<const char*>(&d);
+ return TFieldCalculatorBase::Cond(&dd);
+ }
+
+ bool CondById(const T& d, int condNumber) {
+ const char* dd = reinterpret_cast<const char*>(&d);
+ return TFieldCalculatorBase::CondById(&dd, condNumber);
+ }
+
+ void Print(const T& d, const char* Name) {
+ const char* dd = reinterpret_cast<const char*>(&d);
+ return TFieldCalculatorBase::Print(stdout, &dd, Name);
+ }
+
+ void Print(FILE* p, const T& d, const char* Name) {
+ const char* dd = reinterpret_cast<const char*>(&d);
+ return TFieldCalculatorBase::Print(p, &dd, Name);
+ }
+
+ size_t Compile(char** field_names, int field_count) {
+ TFieldCalculatorBase::Compile(field_names, field_count);
+ return out_el; // number of fields printed
+ }
+
+ void SelfTest() {
+ return TFieldCalculatorBase::SelfTest();
+ }
+
+ void PrintDiff(const T& d1, const T& d2) {
+ return TFieldCalculatorBase::PrintDiff((const char*)&d1, (const char*)&d2);
+ }
+
+ void CalcAll(const T& d, TVector<float>& result) const {
+ const char* dd = reinterpret_cast<const char*>(&d);
+ return TFieldCalculatorBase::CalcAll(&dd, result);
+ }
+
+ // it appends to `result', clear it yourself
+ void DumpAll(IOutputStream& s, const T& d, const TStringBuf& delim) {
+ const char* dd = reinterpret_cast<const char*>(&d);
+ return TFieldCalculatorBase::DumpAll(s, &dd, delim);
+ }
+};
+
+template <class T, class T2>
+class TFieldCalculator2: protected TFieldCalculator<T> {
+public:
+ TFieldCalculator2() {
+ TFieldCalculator<T>::named_dump_items.push_back(get_named_dump_items<T2>());
+ }
+
+ ~TFieldCalculator2() override = default;
+
+ bool Cond(const T& d, const T2& d2) {
+ const char* dd[2] = {reinterpret_cast<const char*>(&d), reinterpret_cast<const char*>(&d2)};
+ return TFieldCalculatorBase::Cond(dd);
+ }
+
+ bool CondById(const T& d, const T2& d2, int condNumber) {
+ const char* dd[2] = {reinterpret_cast<const char*>(&d), reinterpret_cast<const char*>(&d2)};
+ return TFieldCalculatorBase::CondById(dd, condNumber);
+ }
+
+ void Print(const T& d, const T2& d2, const char* Name) {
+ const char* dd[2] = {reinterpret_cast<const char*>(&d), reinterpret_cast<const char*>(&d2)};
+ return TFieldCalculatorBase::Print(stdout, dd, Name);
+ }
+
+ void Print(FILE* p, const T& d, const T2& d2, const char* Name) {
+ const char* dd[2] = {reinterpret_cast<const char*>(&d), reinterpret_cast<const char*>(&d2)};
+ return TFieldCalculatorBase::Print(p, dd, Name);
+ }
+
+ size_t Compile(char** field_names, int field_count) {
+ return TFieldCalculator<T>::Compile(field_names, field_count);
+ }
+};
diff --git a/library/cpp/fieldcalc/field_calc_int.h b/library/cpp/fieldcalc/field_calc_int.h
new file mode 100644
index 0000000000..5f71fafbda
--- /dev/null
+++ b/library/cpp/fieldcalc/field_calc_int.h
@@ -0,0 +1,593 @@
+#pragma once
+
+#include <cmath>
+
+#include <util/system/defaults.h>
+#include <util/system/yassert.h>
+#include <util/memory/alloc.h>
+#include <util/generic/yexception.h>
+
+#include "lossy_types.h"
+#include "field_calc.h"
+
+// eval_res_type
+struct eval_res_type {
+ union {
+ ui32 res_ui32;
+ long res_long;
+ double res_dbl;
+ };
+ int type;
+ eval_res_type(ui32 v)
+ : res_ui32(v)
+ , type(0)
+ {
+ }
+ eval_res_type(long v)
+ : res_long(v)
+ , type(1)
+ {
+ }
+ eval_res_type(bool v)
+ : res_long(v)
+ , type(1)
+ {
+ }
+ eval_res_type(double v)
+ : res_dbl(v)
+ , type(2)
+ {
+ }
+ // a special null value for ternary operator
+ explicit eval_res_type()
+ : type(3)
+ {
+ }
+ operator ui32() const;
+ operator long() const;
+ operator double() const;
+ void to_long();
+ bool is_null() const;
+};
+
+inline bool eval_res_type::is_null() const {
+ return type == 3;
+}
+
+inline void eval_res_type::to_long() {
+ if (type == 0)
+ res_long = res_ui32;
+ else if (type == 2)
+ res_long = (long)res_dbl;
+ type = 1;
+}
+
+inline eval_res_type::operator ui32() const {
+ assert(type == 0);
+ return res_ui32;
+}
+
+inline eval_res_type::operator long() const {
+ assert(type == 0 || type == 1);
+ return type == 1 ? res_long : res_ui32;
+}
+
+inline eval_res_type::operator double() const {
+ return type == 2 ? res_dbl : type == 1 ? (double)res_long : (double)res_ui32;
+}
+
+inline eval_res_type operator+(const eval_res_type& a, const eval_res_type& b) {
+ switch (std::max(a.type, b.type)) {
+ case 0:
+ return (ui32)a + (ui32)b;
+ case 1:
+ return (long)a + (long)b;
+ /*case 2*/ default:
+ return (double)a + (double)b;
+ }
+}
+
+inline eval_res_type operator-(const eval_res_type& a, const eval_res_type& b) {
+ switch (std::max(a.type, b.type)) {
+ case 0:
+ case 1:
+ return (long)a - (long)b;
+ /*case 2*/ default:
+ return (double)a - (double)b;
+ }
+}
+
+inline eval_res_type Minus(const eval_res_type& a) {
+ switch (a.type) {
+ case 0:
+ return -(long)a.res_ui32;
+ case 1:
+ return -a.res_long;
+ /*case 2*/ default:
+ return -a.res_dbl;
+ }
+}
+
+inline eval_res_type Log(const eval_res_type& a) {
+ switch (a.type) {
+ case 0:
+ return log(a.res_ui32);
+ case 1:
+ return log(a.res_long);
+ /*case 2*/ default:
+ return log(a.res_dbl);
+ }
+}
+
+inline eval_res_type Log10(const eval_res_type& a) {
+ switch (a.type) {
+ case 0:
+ return log10(a.res_ui32);
+ case 1:
+ return log10(a.res_long);
+ /*case 2*/ default:
+ return log10(a.res_dbl);
+ }
+}
+
+inline eval_res_type Round(const eval_res_type& a) {
+ switch (a.type) {
+ case 0:
+ return a.res_ui32;
+ case 1:
+ return a.res_long;
+ /*case 2*/ default:
+ return round(a.res_dbl);
+ }
+}
+
+inline bool operator==(const eval_res_type& a, const eval_res_type& b) {
+ switch (std::max(a.type, b.type)) {
+ case 0:
+ return (ui32)a == (ui32)b;
+ case 1:
+ return (long)a == (long)b;
+ /*case 2*/ default:
+ return (double)a == (double)b;
+ }
+}
+
+inline bool operator<(const eval_res_type& a, const eval_res_type& b) {
+ switch (std::max(a.type, b.type)) {
+ case 0:
+ return (ui32)a < (ui32)b;
+ case 1:
+ return (long)a < (long)b;
+ /*case 2*/ default:
+ return (double)a < (double)b;
+ }
+}
+
+inline eval_res_type operator*(const eval_res_type& a, const eval_res_type& b) {
+ switch (std::max(a.type, b.type)) {
+ case 0:
+ return (ui32)a * (ui32)b;
+ case 1:
+ return (long)a * (long)b;
+ /*case 2*/ default:
+ return (double)a * (double)b;
+ }
+}
+
+inline double operator/(const eval_res_type& a, const eval_res_type& b) {
+ double a1 = a, b1 = b;
+ if (b1 == 0) {
+ if (a1 == 0)
+ return 0.; // assume that a should be 0
+ ythrow yexception() << "Division by zero"; // TODO: show parameter names
+ }
+ return a1 / b1;
+}
+
+// dump_item
+enum EDumpItemType {
+ DIT_FAKE_ITEM, // fake item - value never used
+ DIT_MATH_RESULT, // eval result
+ DIT_NAME,
+
+ DIT_FIELDS_START, // Start of item types for real fields
+
+ DIT_BOOL_FIELD,
+ DIT_UI8_FIELD,
+ DIT_UI16_FIELD,
+ DIT_UI32_FIELD,
+ DIT_I64_FIELD,
+ DIT_UI64_FIELD,
+ DIT_FLOAT_FIELD,
+ DIT_DOUBLE_FIELD,
+ DIT_TIME_T32_FIELD,
+ DIT_PF16UI32_FIELD,
+ DIT_PF16FLOAT_FIELD,
+ DIT_SF16FLOAT_FIELD,
+ DIT_STRING_FIELD, // new
+
+ DIT_FIELDS_END, // End of item types for real fields
+
+ DIT_LONG_CONST,
+ DIT_FLOAT_CONST,
+ DIT_STR_CONST,
+
+ DIT_INT_FUNCTION,
+ DIT_FLOAT_FUNCTION,
+ DIT_BOOL_FUNCTION,
+ DIT_STR_FUNCTION, // new
+ DIT_STRBUF_FUNCTION, // new
+
+ DIT_UI8_EXT_FUNCTION,
+ DIT_UI16_EXT_FUNCTION,
+ DIT_UI32_EXT_FUNCTION,
+ DIT_UI64_EXT_FUNCTION,
+
+ DIT_UI8_ENUM_EQ,
+ DIT_UI8_ENUM_SET,
+ DIT_UI16_ENUM_EQ,
+ DIT_UI16_ENUM_SET,
+ DIT_UI32_ENUM_EQ,
+ DIT_UI32_ENUM_SET,
+ DIT_INT_ENUM_FUNCTION_EQ,
+ DIT_INT_ENUM_FUNCTION_SET,
+
+ DIT_BOOL_FUNC_FIXED_STR,
+ DIT_UI8_FUNC_FIXED_STR,
+ DIT_UI16_FUNC_FIXED_STR,
+ DIT_UI32_FUNC_FIXED_STR,
+ DIT_I64_FUNC_FIXED_STR,
+ DIT_UI64_FUNC_FIXED_STR,
+ DIT_FLOAT_FUNC_FIXED_STR,
+ DIT_DOUBLE_FUNC_FIXED_STR,
+
+ DIT_RESOLVE_BY_NAME, //new - for external functions
+
+ DIT_LOCAL_VARIABLE
+};
+
+inline bool IsStringType(EDumpItemType type) {
+ return type == DIT_STRING_FIELD || type == DIT_STR_CONST || type == DIT_STR_FUNCTION || type == DIT_STRBUF_FUNCTION || type == DIT_RESOLVE_BY_NAME;
+}
+
+struct fake {};
+
+struct calc_op;
+
+typedef int (fake::*int_fn_t)() const;
+typedef float (fake::*float_fn_t)() const;
+typedef bool (fake::*bool_fn_t)() const;
+typedef ui16 (fake::*ui16_fn_t)() const;
+typedef ui32 (fake::*ui32_fn_t)() const;
+typedef bool (fake::*bool_strbuf_fn_t)(const TStringBuf&) const; // string -> bool
+typedef ui8 (fake::*ui8_strbuf_fn_t)(const TStringBuf&) const; // string -> ui8
+typedef ui16 (fake::*ui16_strbuf_fn_t)(const TStringBuf&) const; // string -> ui16
+typedef ui32 (fake::*ui32_strbuf_fn_t)(const TStringBuf&) const; // string -> ui32
+typedef i64 (fake::*i64_strbuf_fn_t)(const TStringBuf&) const; // string -> i64
+typedef ui64 (fake::*ui64_strbuf_fn_t)(const TStringBuf&) const; // string -> ui64
+typedef float (fake::*float_strbuf_fn_t)(const TStringBuf&) const; // string -> float
+typedef double (fake::*double_strbuf_fn_t)(const TStringBuf&) const; // string -> double
+typedef const char* (fake::*str_fn_t)() const;
+typedef const char* (fake::*strbuf_2_fn_t)(TString& buf, const char* nul) const;
+typedef TStringBuf (fake::*resolve_fn_t)(const TStringBuf&) const; // string -> string, $var -> "value"
+
+// note: we can not reuse the above signatures, calling conventions may differ
+typedef ui8 (*ui8_ext_fn_t)(const fake*);
+typedef ui16 (*ui16_ext_fn_t)(const fake*);
+typedef ui32 (*ui32_ext_fn_t)(const fake*);
+typedef ui64 (*ui64_ext_fn_t)(const fake*);
+
+struct dump_item {
+ EDumpItemType type;
+ int pack_id = 0;
+
+ union {
+ // fields
+ intptr_t field_offset;
+
+ // constants
+ long long_const;
+ float float_const;
+
+ // functions
+ int_fn_t int_fn;
+ float_fn_t float_fn;
+ bool_fn_t bool_fn;
+ str_fn_t str_fn;
+ strbuf_2_fn_t strbuf_2_fn;
+ resolve_fn_t resolve_fn;
+
+ bool_strbuf_fn_t bool_strbuf_fn;
+ ui8_strbuf_fn_t ui8_strbuf_fn;
+ ui16_strbuf_fn_t ui16_strbuf_fn;
+ ui32_strbuf_fn_t ui32_strbuf_fn;
+ i64_strbuf_fn_t i64_strbuf_fn;
+ ui64_strbuf_fn_t ui64_strbuf_fn;
+ float_strbuf_fn_t float_strbuf_fn;
+ double_strbuf_fn_t double_strbuf_fn;
+
+ ui8_ext_fn_t ui8_ext_fn;
+ ui16_ext_fn_t ui16_ext_fn;
+ ui32_ext_fn_t ui32_ext_fn;
+ ui64_ext_fn_t ui64_ext_fn;
+
+ // enum
+ int_fn_t int_enum_fn;
+
+ // for DIT_MATH_RESULT
+ const calc_op* op;
+ };
+
+ // for enum
+ ui32 enum_val;
+
+ // for local vars, also used to mark accessor functions to use them in dump
+ const char* local_var_name = nullptr;
+
+ int arr_ind; // externally initialized!
+ int arr_length;
+
+ mutable TString the_buf; // buffer for string function, string constants also here
+
+ // Ctors
+ dump_item()
+ : type(DIT_FAKE_ITEM)
+ , field_offset(0)
+ {
+ }
+
+ dump_item(bool* ptr, int arrlen = 0)
+ : type(DIT_BOOL_FIELD)
+ , field_offset(reinterpret_cast<intptr_t>(ptr))
+ , arr_length(arrlen)
+ {
+ }
+ dump_item(ui8* ptr, int arrlen = 0)
+ : type(DIT_UI8_FIELD)
+ , field_offset(reinterpret_cast<intptr_t>(ptr))
+ , arr_length(arrlen)
+ {
+ }
+ dump_item(ui16* ptr, int arrlen = 0)
+ : type(DIT_UI16_FIELD)
+ , field_offset(reinterpret_cast<intptr_t>(ptr))
+ , arr_length(arrlen)
+ {
+ }
+ dump_item(ui32* ptr, int arrlen = 0)
+ : type(DIT_UI32_FIELD)
+ , field_offset(reinterpret_cast<intptr_t>(ptr))
+ , arr_length(arrlen)
+ {
+ }
+ dump_item(i64* ptr, int arrlen = 0)
+ : type(DIT_I64_FIELD)
+ , field_offset(reinterpret_cast<intptr_t>(ptr))
+ , arr_length(arrlen)
+ {
+ }
+ dump_item(ui64* ptr, int arrlen = 0)
+ : type(DIT_UI64_FIELD)
+ , field_offset(reinterpret_cast<intptr_t>(ptr))
+ , arr_length(arrlen)
+ {
+ }
+ dump_item(float* ptr, int arrlen = 0)
+ : type(DIT_FLOAT_FIELD)
+ , field_offset(reinterpret_cast<intptr_t>(ptr))
+ , arr_length(arrlen)
+ {
+ }
+ dump_item(double* ptr, int arrlen = 0)
+ : type(DIT_DOUBLE_FIELD)
+ , field_offset(reinterpret_cast<intptr_t>(ptr))
+ , arr_length(arrlen)
+ {
+ }
+ dump_item(time_t32* ptr, int arrlen = 0)
+ : type(DIT_TIME_T32_FIELD)
+ , field_offset(reinterpret_cast<intptr_t>(ptr))
+ , arr_length(arrlen)
+ {
+ }
+ dump_item(pf16ui32* ptr, int arrlen = 0)
+ : type(DIT_PF16UI32_FIELD)
+ , field_offset(reinterpret_cast<intptr_t>(ptr))
+ , arr_length(arrlen)
+ {
+ }
+ dump_item(pf16float* ptr, int arrlen = 0)
+ : type(DIT_PF16FLOAT_FIELD)
+ , field_offset(reinterpret_cast<intptr_t>(ptr))
+ , arr_length(arrlen)
+ {
+ }
+ dump_item(sf16float* ptr, int arrlen = 0)
+ : type(DIT_SF16FLOAT_FIELD)
+ , field_offset(reinterpret_cast<intptr_t>(ptr))
+ , arr_length(arrlen)
+ {
+ }
+ dump_item(char* ptr, int arrlen = 0)
+ : type(DIT_STRING_FIELD)
+ , field_offset(reinterpret_cast<intptr_t>(ptr))
+ , arr_length(arrlen)
+ {
+ }
+
+ dump_item(long val)
+ : type(DIT_LONG_CONST)
+ , long_const(val)
+ {
+ }
+ dump_item(float val)
+ : type(DIT_FLOAT_CONST)
+ , float_const(val)
+ {
+ }
+ dump_item(TString& val)
+ : type(DIT_STR_CONST)
+ , the_buf(val)
+ {
+ }
+
+ dump_item(int_fn_t fn)
+ : type(DIT_INT_FUNCTION)
+ , int_fn(fn)
+ {
+ }
+ dump_item(float_fn_t fn)
+ : type(DIT_FLOAT_FUNCTION)
+ , float_fn(fn)
+ {
+ }
+ dump_item(bool_fn_t fn)
+ : type(DIT_BOOL_FUNCTION)
+ , bool_fn(fn)
+ {
+ }
+ dump_item(bool_strbuf_fn_t fn, const char* name)
+ : type(DIT_BOOL_FUNC_FIXED_STR)
+ , bool_strbuf_fn(fn)
+ , the_buf(name)
+ {
+ }
+ dump_item(ui8_strbuf_fn_t fn, const char* name)
+ : type(DIT_UI8_FUNC_FIXED_STR)
+ , ui8_strbuf_fn(fn)
+ , the_buf(name)
+ {
+ }
+ dump_item(ui16_strbuf_fn_t fn, const char* name)
+ : type(DIT_UI16_FUNC_FIXED_STR)
+ , ui16_strbuf_fn(fn)
+ , the_buf(name)
+ {
+ }
+ dump_item(ui32_strbuf_fn_t fn, const char* name)
+ : type(DIT_UI32_FUNC_FIXED_STR)
+ , ui32_strbuf_fn(fn)
+ , the_buf(name)
+ {
+ }
+ dump_item(i64_strbuf_fn_t fn, const char* name)
+ : type(DIT_I64_FUNC_FIXED_STR)
+ , i64_strbuf_fn(fn)
+ , the_buf(name)
+ {
+ }
+ dump_item(ui64_strbuf_fn_t fn, const char* name)
+ : type(DIT_UI64_FUNC_FIXED_STR)
+ , ui64_strbuf_fn(fn)
+ , the_buf(name)
+ {
+ }
+ dump_item(float_strbuf_fn_t fn, const char* name)
+ : type(DIT_FLOAT_FUNC_FIXED_STR)
+ , float_strbuf_fn(fn)
+ , the_buf(name)
+ {
+ }
+ dump_item(double_strbuf_fn_t fn, const char* name)
+ : type(DIT_DOUBLE_FUNC_FIXED_STR)
+ , double_strbuf_fn(fn)
+ , the_buf(name)
+ {
+ }
+ dump_item(str_fn_t fn)
+ : type(DIT_STR_FUNCTION)
+ , str_fn(fn)
+ {
+ }
+ dump_item(strbuf_2_fn_t fn)
+ : type(DIT_STRBUF_FUNCTION)
+ , strbuf_2_fn(fn)
+ {
+ }
+
+ dump_item(ui8_ext_fn_t fn, const char* lvn = nullptr)
+ : type(DIT_UI8_EXT_FUNCTION)
+ , ui8_ext_fn(fn)
+ , local_var_name(lvn)
+ {
+ }
+ dump_item(ui16_ext_fn_t fn, const char* lvn = nullptr)
+ : type(DIT_UI16_EXT_FUNCTION)
+ , ui16_ext_fn(fn)
+ , local_var_name(lvn)
+ {
+ }
+ dump_item(ui32_ext_fn_t fn, const char* lvn = nullptr)
+ : type(DIT_UI32_EXT_FUNCTION)
+ , ui32_ext_fn(fn)
+ , local_var_name(lvn)
+ {
+ }
+ dump_item(ui64_ext_fn_t fn, const char* lvn = nullptr)
+ : type(DIT_UI64_EXT_FUNCTION)
+ , ui64_ext_fn(fn)
+ , local_var_name(lvn)
+ {
+ }
+
+ dump_item(ui8* ptr, ui32 val, bool bitset)
+ : type(bitset ? DIT_UI8_ENUM_SET : DIT_UI8_ENUM_EQ)
+ , field_offset(reinterpret_cast<intptr_t>(ptr))
+ , enum_val(val)
+ {
+ }
+
+ dump_item(ui16* ptr, ui32 val, bool bitset)
+ : type(bitset ? DIT_UI16_ENUM_SET : DIT_UI16_ENUM_EQ)
+ , field_offset(reinterpret_cast<intptr_t>(ptr))
+ , enum_val(val)
+ {
+ }
+
+ dump_item(ui32* ptr, ui32 val, bool bitset)
+ : type(bitset ? DIT_UI32_ENUM_SET : DIT_UI32_ENUM_EQ)
+ , field_offset(reinterpret_cast<intptr_t>(ptr))
+ , enum_val(val)
+ {
+ }
+
+ dump_item(int_fn_t fn, ui32 val, bool bitset)
+ : type(bitset ? DIT_INT_ENUM_FUNCTION_SET : DIT_INT_ENUM_FUNCTION_EQ)
+ , int_enum_fn(fn)
+ , enum_val(val)
+ {
+ }
+
+ dump_item(resolve_fn_t fn, const char* name)
+ : type(DIT_RESOLVE_BY_NAME)
+ , resolve_fn(fn)
+ , the_buf(name)
+ {
+ } //name of variable saved in the_buf
+
+ // Functions
+ template <class TOut> // implemented for FILE*, TString* (appends) and IOutputStream*
+ void print(TOut* p, const char** dd) const;
+ TStringBuf GetStrBuf(const char** dd) const; // for char-types only!
+ eval_res_type eval(const char** dd) const;
+ void set_arrind(int arrind);
+ void rewrite_op(const calc_op* ops);
+
+ bool is_accessor_func() const {
+ return type >= DIT_INT_FUNCTION && type <= DIT_UI64_EXT_FUNCTION && local_var_name;
+ }
+
+ bool is_field() const {
+ return type > DIT_FIELDS_START && type < DIT_FIELDS_END || is_accessor_func();
+ }
+
+ bool is_array_field() const {
+ return is_field() && arr_length > 0;
+ }
+};
+
+// named_dump_item
+struct named_dump_item {
+ const char* name;
+ dump_item item;
+};
diff --git a/library/cpp/fieldcalc/lossy_types.h b/library/cpp/fieldcalc/lossy_types.h
new file mode 100644
index 0000000000..98acfea902
--- /dev/null
+++ b/library/cpp/fieldcalc/lossy_types.h
@@ -0,0 +1,52 @@
+#pragma once
+
+#include <util/generic/cast.h>
+
+// although target value is float, this thing is only used as unsigned int container
+struct pf16ui32 {
+ ui16 val;
+ pf16ui32()
+ : val(0)
+ {
+ }
+ void operator=(ui32 t) {
+ val = static_cast<ui16>(BitCast<ui32>(static_cast<float>(t)) >> 15);
+ }
+ operator ui32() const {
+ return (ui32)BitCast<float>((ui32)(val << 15));
+ }
+};
+
+// unsigned float value
+struct pf16float {
+ ui16 val;
+ pf16float()
+ : val(0)
+ {
+ }
+ void operator=(float t) {
+ assert(t >= 0.);
+ val = static_cast<ui16>(BitCast<ui32>(t) >> 15);
+ }
+ operator float() const {
+ return BitCast<float>((ui32)(val << 15));
+ }
+};
+
+// signed float value
+struct sf16float {
+ ui16 val;
+ sf16float()
+ : val(0)
+ {
+ }
+ void operator=(float t) {
+ assert(t >= 0.);
+ val = BitCast<ui32>(t) >> 16;
+ }
+ operator float() const {
+ return BitCast<float>((ui32)(val << 16));
+ }
+};
+
+typedef i32 time_t32; // not really lossy, should be placed somewhere else
diff --git a/library/cpp/fieldcalc/ya.make b/library/cpp/fieldcalc/ya.make
new file mode 100644
index 0000000000..9796592996
--- /dev/null
+++ b/library/cpp/fieldcalc/ya.make
@@ -0,0 +1,13 @@
+LIBRARY()
+
+PEERDIR(
+ library/cpp/deprecated/autoarray
+)
+
+SRCS(
+ field_calc.cpp
+ lossy_types.h
+ field_calc_int.h
+)
+
+END()