diff options
author | asmyasnikov <asmyasnikov@ydb.tech> | 2024-06-26 17:09:51 +0300 |
---|---|---|
committer | asmyasnikov <asmyasnikov@ydb.tech> | 2024-06-26 17:27:07 +0300 |
commit | e25934f4bbe7b98daa362f04861972e8f83066ad (patch) | |
tree | b350932f398fafa6740fe43a529edf700c747270 /contrib/libs/antlr4_cpp_runtime/src/misc/IntervalSet.cpp | |
parent | e6190f5d36aef50e2fec0076c384ba0874f5564c (diff) | |
download | ydb-e25934f4bbe7b98daa362f04861972e8f83066ad.tar.gz |
Added antlr4 to exported contribs into github.com/ydb-platform/ydb
4916444b182c044b7cd4c10f838a37a252ea36cf
Diffstat (limited to 'contrib/libs/antlr4_cpp_runtime/src/misc/IntervalSet.cpp')
-rw-r--r-- | contrib/libs/antlr4_cpp_runtime/src/misc/IntervalSet.cpp | 501 |
1 files changed, 501 insertions, 0 deletions
diff --git a/contrib/libs/antlr4_cpp_runtime/src/misc/IntervalSet.cpp b/contrib/libs/antlr4_cpp_runtime/src/misc/IntervalSet.cpp new file mode 100644 index 0000000000..d230bf45f6 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/misc/IntervalSet.cpp @@ -0,0 +1,501 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "misc/MurmurHash.h" +#include "Lexer.h" +#include "Exceptions.h" +#include "Vocabulary.h" + +#include "misc/IntervalSet.h" + +using namespace antlr4; +using namespace antlr4::misc; + +IntervalSet const IntervalSet::COMPLETE_CHAR_SET = + IntervalSet::of(Lexer::MIN_CHAR_VALUE, Lexer::MAX_CHAR_VALUE); + +IntervalSet const IntervalSet::EMPTY_SET; + +IntervalSet::IntervalSet() : _intervals() { +} + +IntervalSet::IntervalSet(const IntervalSet &set) : IntervalSet() { + _intervals = set._intervals; +} + +IntervalSet::IntervalSet(IntervalSet&& set) : IntervalSet(std::move(set._intervals)) { +} + +IntervalSet::IntervalSet(std::vector<Interval>&& intervals) : _intervals(std::move(intervals)) { +} + +IntervalSet& IntervalSet::operator=(const IntervalSet& other) { + _intervals = other._intervals; + return *this; +} + +IntervalSet& IntervalSet::operator=(IntervalSet&& other) { + _intervals = move(other._intervals); + return *this; +} + +IntervalSet IntervalSet::of(ssize_t a) { + return IntervalSet({ Interval(a, a) }); +} + +IntervalSet IntervalSet::of(ssize_t a, ssize_t b) { + return IntervalSet({ Interval(a, b) }); +} + +void IntervalSet::clear() { + _intervals.clear(); +} + +void IntervalSet::add(ssize_t el) { + add(el, el); +} + +void IntervalSet::add(ssize_t a, ssize_t b) { + add(Interval(a, b)); +} + +void IntervalSet::add(const Interval &addition) { + if (addition.b < addition.a) { + return; + } + + // find position in list + for (auto iterator = _intervals.begin(); iterator != _intervals.end(); ++iterator) { + Interval r = *iterator; + if (addition == r) { + return; + } + + if (addition.adjacent(r) || !addition.disjoint(r)) { + // next to each other, make a single larger interval + Interval bigger = addition.Union(r); + *iterator = bigger; + + // make sure we didn't just create an interval that + // should be merged with next interval in list + while (iterator + 1 != _intervals.end()) { + Interval next = *++iterator; + if (!bigger.adjacent(next) && bigger.disjoint(next)) { + break; + } + + // if we bump up against or overlap next, merge + iterator = _intervals.erase(iterator);// remove this one + --iterator; // move backwards to what we just set + *iterator = bigger.Union(next); // set to 3 merged ones + // ml: no need to advance iterator, we do that in the next round anyway. ++iterator; // first call to next after previous duplicates the result + } + return; + } + + if (addition.startsBeforeDisjoint(r)) { + // insert before r + //--iterator; + _intervals.insert(iterator, addition); + return; + } + + // if disjoint and after r, a future iteration will handle it + } + + // ok, must be after last interval (and disjoint from last interval) + // just add it + _intervals.push_back(addition); +} + +IntervalSet IntervalSet::Or(const std::vector<IntervalSet> &sets) { + IntervalSet result; + for (const auto &s : sets) { + result.addAll(s); + } + return result; +} + +IntervalSet& IntervalSet::addAll(const IntervalSet &set) { + // walk set and add each interval + for (auto const& interval : set._intervals) { + add(interval); + } + return *this; +} + +IntervalSet IntervalSet::complement(ssize_t minElement, ssize_t maxElement) const { + return complement(IntervalSet::of(minElement, maxElement)); +} + +IntervalSet IntervalSet::complement(const IntervalSet &vocabulary) const { + return vocabulary.subtract(*this); +} + +IntervalSet IntervalSet::subtract(const IntervalSet &other) const { + return subtract(*this, other); +} + +IntervalSet IntervalSet::subtract(const IntervalSet &left, const IntervalSet &right) { + if (left.isEmpty()) { + return IntervalSet(); + } + + if (right.isEmpty()) { + // right set has no elements; just return the copy of the current set + return left; + } + + IntervalSet result(left); + size_t resultI = 0; + size_t rightI = 0; + while (resultI < result._intervals.size() && rightI < right._intervals.size()) { + Interval &resultInterval = result._intervals[resultI]; + const Interval &rightInterval = right._intervals[rightI]; + + // operation: (resultInterval - rightInterval) and update indexes + + if (rightInterval.b < resultInterval.a) { + rightI++; + continue; + } + + if (rightInterval.a > resultInterval.b) { + resultI++; + continue; + } + + Interval beforeCurrent; + Interval afterCurrent; + if (rightInterval.a > resultInterval.a) { + beforeCurrent = Interval(resultInterval.a, rightInterval.a - 1); + } + + if (rightInterval.b < resultInterval.b) { + afterCurrent = Interval(rightInterval.b + 1, resultInterval.b); + } + + if (beforeCurrent.a > -1) { // -1 is the default value + if (afterCurrent.a > -1) { + // split the current interval into two + result._intervals[resultI] = beforeCurrent; + result._intervals.insert(result._intervals.begin() + resultI + 1, afterCurrent); + resultI++; + rightI++; + } else { + // replace the current interval + result._intervals[resultI] = beforeCurrent; + resultI++; + } + } else { + if (afterCurrent.a > -1) { + // replace the current interval + result._intervals[resultI] = afterCurrent; + rightI++; + } else { + // remove the current interval (thus no need to increment resultI) + result._intervals.erase(result._intervals.begin() + resultI); + } + } + } + + // If rightI reached right.intervals.size(), no more intervals to subtract from result. + // If resultI reached result.intervals.size(), we would be subtracting from an empty set. + // Either way, we are done. + return result; +} + +IntervalSet IntervalSet::Or(const IntervalSet &a) const { + IntervalSet result; + result.addAll(*this); + result.addAll(a); + return result; +} + +IntervalSet IntervalSet::And(const IntervalSet &other) const { + IntervalSet intersection; + size_t i = 0; + size_t j = 0; + + // iterate down both interval lists looking for nondisjoint intervals + while (i < _intervals.size() && j < other._intervals.size()) { + Interval mine = _intervals[i]; + Interval theirs = other._intervals[j]; + + if (mine.startsBeforeDisjoint(theirs)) { + // move this iterator looking for interval that might overlap + i++; + } else if (theirs.startsBeforeDisjoint(mine)) { + // move other iterator looking for interval that might overlap + j++; + } else if (mine.properlyContains(theirs)) { + // overlap, add intersection, get next theirs + intersection.add(mine.intersection(theirs)); + j++; + } else if (theirs.properlyContains(mine)) { + // overlap, add intersection, get next mine + intersection.add(mine.intersection(theirs)); + i++; + } else if (!mine.disjoint(theirs)) { + // overlap, add intersection + intersection.add(mine.intersection(theirs)); + + // Move the iterator of lower range [a..b], but not + // the upper range as it may contain elements that will collide + // with the next iterator. So, if mine=[0..115] and + // theirs=[115..200], then intersection is 115 and move mine + // but not theirs as theirs may collide with the next range + // in thisIter. + // move both iterators to next ranges + if (mine.startsAfterNonDisjoint(theirs)) { + j++; + } else if (theirs.startsAfterNonDisjoint(mine)) { + i++; + } + } + } + + return intersection; +} + + +bool IntervalSet::contains(ssize_t el) const { + if (_intervals.empty() || el < _intervals.front().a || el > _intervals.back().b) { + return false; + } + + return std::binary_search(_intervals.begin(), _intervals.end(), Interval(el, el), [](const Interval &lhs, const Interval &rhs) { + return lhs.b < rhs.a; + }); +} + +bool IntervalSet::isEmpty() const { + return _intervals.empty(); +} + +ssize_t IntervalSet::getSingleElement() const { + if (_intervals.size() == 1) { + if (_intervals[0].a == _intervals[0].b) { + return _intervals[0].a; + } + } + + return Token::INVALID_TYPE; // XXX: this value is 0, but 0 is a valid interval range, how can that work? +} + +ssize_t IntervalSet::getMaxElement() const { + if (_intervals.empty()) { + return Token::INVALID_TYPE; + } + + return _intervals.back().b; +} + +ssize_t IntervalSet::getMinElement() const { + if (_intervals.empty()) { + return Token::INVALID_TYPE; + } + + return _intervals.front().a; +} + +std::vector<Interval> const& IntervalSet::getIntervals() const { + return _intervals; +} + +size_t IntervalSet::hashCode() const { + size_t hash = MurmurHash::initialize(); + for (const auto &interval : _intervals) { + hash = MurmurHash::update(hash, interval.a); + hash = MurmurHash::update(hash, interval.b); + } + + return MurmurHash::finish(hash, _intervals.size() * 2); +} + +bool IntervalSet::operator == (const IntervalSet &other) const { + if (_intervals.empty() && other._intervals.empty()) + return true; + + if (_intervals.size() != other._intervals.size()) + return false; + + return std::equal(_intervals.begin(), _intervals.end(), other._intervals.begin()); +} + +std::string IntervalSet::toString() const { + return toString(false); +} + +std::string IntervalSet::toString(bool elemAreChar) const { + if (_intervals.empty()) { + return "{}"; + } + + std::stringstream ss; + size_t effectiveSize = size(); + if (effectiveSize > 1) { + ss << "{"; + } + + bool firstEntry = true; + for (const auto &interval : _intervals) { + if (!firstEntry) + ss << ", "; + firstEntry = false; + + ssize_t a = interval.a; + ssize_t b = interval.b; + if (a == b) { + if (a == -1) { + ss << "<EOF>"; + } else if (elemAreChar) { + ss << "'" << static_cast<char>(a) << "'"; + } else { + ss << a; + } + } else { + if (elemAreChar) { + ss << "'" << static_cast<char>(a) << "'..'" << static_cast<char>(b) << "'"; + } else { + ss << a << ".." << b; + } + } + } + if (effectiveSize > 1) { + ss << "}"; + } + + return ss.str(); +} + +std::string IntervalSet::toString(const dfa::Vocabulary &vocabulary) const { + if (_intervals.empty()) { + return "{}"; + } + + std::stringstream ss; + size_t effectiveSize = size(); + if (effectiveSize > 1) { + ss << "{"; + } + + bool firstEntry = true; + for (const auto &interval : _intervals) { + if (!firstEntry) + ss << ", "; + firstEntry = false; + + ssize_t a = interval.a; + ssize_t b = interval.b; + if (a == b) { + ss << elementName(vocabulary, a); + } else { + for (ssize_t i = a; i <= b; i++) { + if (i > a) { + ss << ", "; + } + ss << elementName(vocabulary, i); + } + } + } + if (effectiveSize > 1) { + ss << "}"; + } + + return ss.str(); +} + +std::string IntervalSet::elementName(const dfa::Vocabulary &vocabulary, ssize_t a) const { + if (a == -1) { + return "<EOF>"; + } else if (a == -2) { + return "<EPSILON>"; + } else { + return vocabulary.getDisplayName(a); + } +} + +size_t IntervalSet::size() const { + size_t result = 0; + for (const auto &interval : _intervals) { + result += size_t(interval.b - interval.a + 1); + } + return result; +} + +std::vector<ssize_t> IntervalSet::toList() const { + std::vector<ssize_t> result; + for (const auto &interval : _intervals) { + ssize_t a = interval.a; + ssize_t b = interval.b; + for (ssize_t v = a; v <= b; v++) { + result.push_back(v); + } + } + return result; +} + +std::set<ssize_t> IntervalSet::toSet() const { + std::set<ssize_t> result; + for (const auto &interval : _intervals) { + ssize_t a = interval.a; + ssize_t b = interval.b; + for (ssize_t v = a; v <= b; v++) { + result.insert(v); + } + } + return result; +} + +ssize_t IntervalSet::get(size_t i) const { + size_t index = 0; + for (const auto &interval : _intervals) { + ssize_t a = interval.a; + ssize_t b = interval.b; + for (ssize_t v = a; v <= b; v++) { + if (index == i) { + return v; + } + index++; + } + } + return -1; +} + +void IntervalSet::remove(ssize_t el) { + for (size_t i = 0; i < _intervals.size(); ++i) { + Interval &interval = _intervals[i]; + ssize_t a = interval.a; + ssize_t b = interval.b; + if (el < a) { + break; // list is sorted and el is before this interval; not here + } + + // if whole interval x..x, rm + if (el == a && el == b) { + _intervals.erase(_intervals.begin() + (long)i); + break; + } + // if on left edge x..b, adjust left + if (el == a) { + interval.a++; + break; + } + // if on right edge a..x, adjust right + if (el == b) { + interval.b--; + break; + } + // if in middle a..x..b, split interval + if (el > a && el < b) { // found in this interval + ssize_t oldb = interval.b; + interval.b = el - 1; // [a..x-1] + add(el + 1, oldb); // add [x+1..b] + + break; // ml: not in the Java code but I believe we also should stop searching here, as we found x. + } + } +} |