aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/antlr4_cpp_runtime/src/misc/IntervalSet.cpp
diff options
context:
space:
mode:
authorasmyasnikov <asmyasnikov@ydb.tech>2024-06-26 17:09:51 +0300
committerasmyasnikov <asmyasnikov@ydb.tech>2024-06-26 17:27:07 +0300
commite25934f4bbe7b98daa362f04861972e8f83066ad (patch)
treeb350932f398fafa6740fe43a529edf700c747270 /contrib/libs/antlr4_cpp_runtime/src/misc/IntervalSet.cpp
parente6190f5d36aef50e2fec0076c384ba0874f5564c (diff)
downloadydb-e25934f4bbe7b98daa362f04861972e8f83066ad.tar.gz
Added antlr4 to exported contribs into github.com/ydb-platform/ydb
4916444b182c044b7cd4c10f838a37a252ea36cf
Diffstat (limited to 'contrib/libs/antlr4_cpp_runtime/src/misc/IntervalSet.cpp')
-rw-r--r--contrib/libs/antlr4_cpp_runtime/src/misc/IntervalSet.cpp501
1 files changed, 501 insertions, 0 deletions
diff --git a/contrib/libs/antlr4_cpp_runtime/src/misc/IntervalSet.cpp b/contrib/libs/antlr4_cpp_runtime/src/misc/IntervalSet.cpp
new file mode 100644
index 0000000000..d230bf45f6
--- /dev/null
+++ b/contrib/libs/antlr4_cpp_runtime/src/misc/IntervalSet.cpp
@@ -0,0 +1,501 @@
+/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
+ * Use of this file is governed by the BSD 3-clause license that
+ * can be found in the LICENSE.txt file in the project root.
+ */
+
+#include "misc/MurmurHash.h"
+#include "Lexer.h"
+#include "Exceptions.h"
+#include "Vocabulary.h"
+
+#include "misc/IntervalSet.h"
+
+using namespace antlr4;
+using namespace antlr4::misc;
+
+IntervalSet const IntervalSet::COMPLETE_CHAR_SET =
+ IntervalSet::of(Lexer::MIN_CHAR_VALUE, Lexer::MAX_CHAR_VALUE);
+
+IntervalSet const IntervalSet::EMPTY_SET;
+
+IntervalSet::IntervalSet() : _intervals() {
+}
+
+IntervalSet::IntervalSet(const IntervalSet &set) : IntervalSet() {
+ _intervals = set._intervals;
+}
+
+IntervalSet::IntervalSet(IntervalSet&& set) : IntervalSet(std::move(set._intervals)) {
+}
+
+IntervalSet::IntervalSet(std::vector<Interval>&& intervals) : _intervals(std::move(intervals)) {
+}
+
+IntervalSet& IntervalSet::operator=(const IntervalSet& other) {
+ _intervals = other._intervals;
+ return *this;
+}
+
+IntervalSet& IntervalSet::operator=(IntervalSet&& other) {
+ _intervals = move(other._intervals);
+ return *this;
+}
+
+IntervalSet IntervalSet::of(ssize_t a) {
+ return IntervalSet({ Interval(a, a) });
+}
+
+IntervalSet IntervalSet::of(ssize_t a, ssize_t b) {
+ return IntervalSet({ Interval(a, b) });
+}
+
+void IntervalSet::clear() {
+ _intervals.clear();
+}
+
+void IntervalSet::add(ssize_t el) {
+ add(el, el);
+}
+
+void IntervalSet::add(ssize_t a, ssize_t b) {
+ add(Interval(a, b));
+}
+
+void IntervalSet::add(const Interval &addition) {
+ if (addition.b < addition.a) {
+ return;
+ }
+
+ // find position in list
+ for (auto iterator = _intervals.begin(); iterator != _intervals.end(); ++iterator) {
+ Interval r = *iterator;
+ if (addition == r) {
+ return;
+ }
+
+ if (addition.adjacent(r) || !addition.disjoint(r)) {
+ // next to each other, make a single larger interval
+ Interval bigger = addition.Union(r);
+ *iterator = bigger;
+
+ // make sure we didn't just create an interval that
+ // should be merged with next interval in list
+ while (iterator + 1 != _intervals.end()) {
+ Interval next = *++iterator;
+ if (!bigger.adjacent(next) && bigger.disjoint(next)) {
+ break;
+ }
+
+ // if we bump up against or overlap next, merge
+ iterator = _intervals.erase(iterator);// remove this one
+ --iterator; // move backwards to what we just set
+ *iterator = bigger.Union(next); // set to 3 merged ones
+ // ml: no need to advance iterator, we do that in the next round anyway. ++iterator; // first call to next after previous duplicates the result
+ }
+ return;
+ }
+
+ if (addition.startsBeforeDisjoint(r)) {
+ // insert before r
+ //--iterator;
+ _intervals.insert(iterator, addition);
+ return;
+ }
+
+ // if disjoint and after r, a future iteration will handle it
+ }
+
+ // ok, must be after last interval (and disjoint from last interval)
+ // just add it
+ _intervals.push_back(addition);
+}
+
+IntervalSet IntervalSet::Or(const std::vector<IntervalSet> &sets) {
+ IntervalSet result;
+ for (const auto &s : sets) {
+ result.addAll(s);
+ }
+ return result;
+}
+
+IntervalSet& IntervalSet::addAll(const IntervalSet &set) {
+ // walk set and add each interval
+ for (auto const& interval : set._intervals) {
+ add(interval);
+ }
+ return *this;
+}
+
+IntervalSet IntervalSet::complement(ssize_t minElement, ssize_t maxElement) const {
+ return complement(IntervalSet::of(minElement, maxElement));
+}
+
+IntervalSet IntervalSet::complement(const IntervalSet &vocabulary) const {
+ return vocabulary.subtract(*this);
+}
+
+IntervalSet IntervalSet::subtract(const IntervalSet &other) const {
+ return subtract(*this, other);
+}
+
+IntervalSet IntervalSet::subtract(const IntervalSet &left, const IntervalSet &right) {
+ if (left.isEmpty()) {
+ return IntervalSet();
+ }
+
+ if (right.isEmpty()) {
+ // right set has no elements; just return the copy of the current set
+ return left;
+ }
+
+ IntervalSet result(left);
+ size_t resultI = 0;
+ size_t rightI = 0;
+ while (resultI < result._intervals.size() && rightI < right._intervals.size()) {
+ Interval &resultInterval = result._intervals[resultI];
+ const Interval &rightInterval = right._intervals[rightI];
+
+ // operation: (resultInterval - rightInterval) and update indexes
+
+ if (rightInterval.b < resultInterval.a) {
+ rightI++;
+ continue;
+ }
+
+ if (rightInterval.a > resultInterval.b) {
+ resultI++;
+ continue;
+ }
+
+ Interval beforeCurrent;
+ Interval afterCurrent;
+ if (rightInterval.a > resultInterval.a) {
+ beforeCurrent = Interval(resultInterval.a, rightInterval.a - 1);
+ }
+
+ if (rightInterval.b < resultInterval.b) {
+ afterCurrent = Interval(rightInterval.b + 1, resultInterval.b);
+ }
+
+ if (beforeCurrent.a > -1) { // -1 is the default value
+ if (afterCurrent.a > -1) {
+ // split the current interval into two
+ result._intervals[resultI] = beforeCurrent;
+ result._intervals.insert(result._intervals.begin() + resultI + 1, afterCurrent);
+ resultI++;
+ rightI++;
+ } else {
+ // replace the current interval
+ result._intervals[resultI] = beforeCurrent;
+ resultI++;
+ }
+ } else {
+ if (afterCurrent.a > -1) {
+ // replace the current interval
+ result._intervals[resultI] = afterCurrent;
+ rightI++;
+ } else {
+ // remove the current interval (thus no need to increment resultI)
+ result._intervals.erase(result._intervals.begin() + resultI);
+ }
+ }
+ }
+
+ // If rightI reached right.intervals.size(), no more intervals to subtract from result.
+ // If resultI reached result.intervals.size(), we would be subtracting from an empty set.
+ // Either way, we are done.
+ return result;
+}
+
+IntervalSet IntervalSet::Or(const IntervalSet &a) const {
+ IntervalSet result;
+ result.addAll(*this);
+ result.addAll(a);
+ return result;
+}
+
+IntervalSet IntervalSet::And(const IntervalSet &other) const {
+ IntervalSet intersection;
+ size_t i = 0;
+ size_t j = 0;
+
+ // iterate down both interval lists looking for nondisjoint intervals
+ while (i < _intervals.size() && j < other._intervals.size()) {
+ Interval mine = _intervals[i];
+ Interval theirs = other._intervals[j];
+
+ if (mine.startsBeforeDisjoint(theirs)) {
+ // move this iterator looking for interval that might overlap
+ i++;
+ } else if (theirs.startsBeforeDisjoint(mine)) {
+ // move other iterator looking for interval that might overlap
+ j++;
+ } else if (mine.properlyContains(theirs)) {
+ // overlap, add intersection, get next theirs
+ intersection.add(mine.intersection(theirs));
+ j++;
+ } else if (theirs.properlyContains(mine)) {
+ // overlap, add intersection, get next mine
+ intersection.add(mine.intersection(theirs));
+ i++;
+ } else if (!mine.disjoint(theirs)) {
+ // overlap, add intersection
+ intersection.add(mine.intersection(theirs));
+
+ // Move the iterator of lower range [a..b], but not
+ // the upper range as it may contain elements that will collide
+ // with the next iterator. So, if mine=[0..115] and
+ // theirs=[115..200], then intersection is 115 and move mine
+ // but not theirs as theirs may collide with the next range
+ // in thisIter.
+ // move both iterators to next ranges
+ if (mine.startsAfterNonDisjoint(theirs)) {
+ j++;
+ } else if (theirs.startsAfterNonDisjoint(mine)) {
+ i++;
+ }
+ }
+ }
+
+ return intersection;
+}
+
+
+bool IntervalSet::contains(ssize_t el) const {
+ if (_intervals.empty() || el < _intervals.front().a || el > _intervals.back().b) {
+ return false;
+ }
+
+ return std::binary_search(_intervals.begin(), _intervals.end(), Interval(el, el), [](const Interval &lhs, const Interval &rhs) {
+ return lhs.b < rhs.a;
+ });
+}
+
+bool IntervalSet::isEmpty() const {
+ return _intervals.empty();
+}
+
+ssize_t IntervalSet::getSingleElement() const {
+ if (_intervals.size() == 1) {
+ if (_intervals[0].a == _intervals[0].b) {
+ return _intervals[0].a;
+ }
+ }
+
+ return Token::INVALID_TYPE; // XXX: this value is 0, but 0 is a valid interval range, how can that work?
+}
+
+ssize_t IntervalSet::getMaxElement() const {
+ if (_intervals.empty()) {
+ return Token::INVALID_TYPE;
+ }
+
+ return _intervals.back().b;
+}
+
+ssize_t IntervalSet::getMinElement() const {
+ if (_intervals.empty()) {
+ return Token::INVALID_TYPE;
+ }
+
+ return _intervals.front().a;
+}
+
+std::vector<Interval> const& IntervalSet::getIntervals() const {
+ return _intervals;
+}
+
+size_t IntervalSet::hashCode() const {
+ size_t hash = MurmurHash::initialize();
+ for (const auto &interval : _intervals) {
+ hash = MurmurHash::update(hash, interval.a);
+ hash = MurmurHash::update(hash, interval.b);
+ }
+
+ return MurmurHash::finish(hash, _intervals.size() * 2);
+}
+
+bool IntervalSet::operator == (const IntervalSet &other) const {
+ if (_intervals.empty() && other._intervals.empty())
+ return true;
+
+ if (_intervals.size() != other._intervals.size())
+ return false;
+
+ return std::equal(_intervals.begin(), _intervals.end(), other._intervals.begin());
+}
+
+std::string IntervalSet::toString() const {
+ return toString(false);
+}
+
+std::string IntervalSet::toString(bool elemAreChar) const {
+ if (_intervals.empty()) {
+ return "{}";
+ }
+
+ std::stringstream ss;
+ size_t effectiveSize = size();
+ if (effectiveSize > 1) {
+ ss << "{";
+ }
+
+ bool firstEntry = true;
+ for (const auto &interval : _intervals) {
+ if (!firstEntry)
+ ss << ", ";
+ firstEntry = false;
+
+ ssize_t a = interval.a;
+ ssize_t b = interval.b;
+ if (a == b) {
+ if (a == -1) {
+ ss << "<EOF>";
+ } else if (elemAreChar) {
+ ss << "'" << static_cast<char>(a) << "'";
+ } else {
+ ss << a;
+ }
+ } else {
+ if (elemAreChar) {
+ ss << "'" << static_cast<char>(a) << "'..'" << static_cast<char>(b) << "'";
+ } else {
+ ss << a << ".." << b;
+ }
+ }
+ }
+ if (effectiveSize > 1) {
+ ss << "}";
+ }
+
+ return ss.str();
+}
+
+std::string IntervalSet::toString(const dfa::Vocabulary &vocabulary) const {
+ if (_intervals.empty()) {
+ return "{}";
+ }
+
+ std::stringstream ss;
+ size_t effectiveSize = size();
+ if (effectiveSize > 1) {
+ ss << "{";
+ }
+
+ bool firstEntry = true;
+ for (const auto &interval : _intervals) {
+ if (!firstEntry)
+ ss << ", ";
+ firstEntry = false;
+
+ ssize_t a = interval.a;
+ ssize_t b = interval.b;
+ if (a == b) {
+ ss << elementName(vocabulary, a);
+ } else {
+ for (ssize_t i = a; i <= b; i++) {
+ if (i > a) {
+ ss << ", ";
+ }
+ ss << elementName(vocabulary, i);
+ }
+ }
+ }
+ if (effectiveSize > 1) {
+ ss << "}";
+ }
+
+ return ss.str();
+}
+
+std::string IntervalSet::elementName(const dfa::Vocabulary &vocabulary, ssize_t a) const {
+ if (a == -1) {
+ return "<EOF>";
+ } else if (a == -2) {
+ return "<EPSILON>";
+ } else {
+ return vocabulary.getDisplayName(a);
+ }
+}
+
+size_t IntervalSet::size() const {
+ size_t result = 0;
+ for (const auto &interval : _intervals) {
+ result += size_t(interval.b - interval.a + 1);
+ }
+ return result;
+}
+
+std::vector<ssize_t> IntervalSet::toList() const {
+ std::vector<ssize_t> result;
+ for (const auto &interval : _intervals) {
+ ssize_t a = interval.a;
+ ssize_t b = interval.b;
+ for (ssize_t v = a; v <= b; v++) {
+ result.push_back(v);
+ }
+ }
+ return result;
+}
+
+std::set<ssize_t> IntervalSet::toSet() const {
+ std::set<ssize_t> result;
+ for (const auto &interval : _intervals) {
+ ssize_t a = interval.a;
+ ssize_t b = interval.b;
+ for (ssize_t v = a; v <= b; v++) {
+ result.insert(v);
+ }
+ }
+ return result;
+}
+
+ssize_t IntervalSet::get(size_t i) const {
+ size_t index = 0;
+ for (const auto &interval : _intervals) {
+ ssize_t a = interval.a;
+ ssize_t b = interval.b;
+ for (ssize_t v = a; v <= b; v++) {
+ if (index == i) {
+ return v;
+ }
+ index++;
+ }
+ }
+ return -1;
+}
+
+void IntervalSet::remove(ssize_t el) {
+ for (size_t i = 0; i < _intervals.size(); ++i) {
+ Interval &interval = _intervals[i];
+ ssize_t a = interval.a;
+ ssize_t b = interval.b;
+ if (el < a) {
+ break; // list is sorted and el is before this interval; not here
+ }
+
+ // if whole interval x..x, rm
+ if (el == a && el == b) {
+ _intervals.erase(_intervals.begin() + (long)i);
+ break;
+ }
+ // if on left edge x..b, adjust left
+ if (el == a) {
+ interval.a++;
+ break;
+ }
+ // if on right edge a..x, adjust right
+ if (el == b) {
+ interval.b--;
+ break;
+ }
+ // if in middle a..x..b, split interval
+ if (el > a && el < b) { // found in this interval
+ ssize_t oldb = interval.b;
+ interval.b = el - 1; // [a..x-1]
+ add(el + 1, oldb); // add [x+1..b]
+
+ break; // ml: not in the Java code but I believe we also should stop searching here, as we found x.
+ }
+ }
+}