aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorrobot-contrib <robot-contrib@yandex-team.com>2024-05-17 12:05:29 +0300
committerrobot-contrib <robot-contrib@yandex-team.com>2024-05-17 12:21:18 +0300
commit4a008c0eb2019067beb27c6118db587e96679ec5 (patch)
tree313d6380b47866f294f10acfaeccd0407b9cb8bc
parent2bd98b5c8ddd404f62db9f74144a763de01341cc (diff)
downloadydb-4a008c0eb2019067beb27c6118db587e96679ec5.tar.gz
Update contrib/libs/apache/orc to 2.0.1
855bd01a1b384d08c59ee86c6b493ea32190ef19
-rw-r--r--contrib/libs/apache/orc/c++/include/orc/OrcFile.hh4
-rw-r--r--contrib/libs/apache/orc/c++/include/orc/orc-config.hh2
-rw-r--r--contrib/libs/apache/orc/c++/src/Adaptor-linux.hh1
-rw-r--r--contrib/libs/apache/orc/c++/src/Adaptor.cc16
-rw-r--r--contrib/libs/apache/orc/c++/src/Timezone.cc86
-rw-r--r--contrib/libs/apache/orc/c++/src/sargs/SargsApplier.cc8
-rw-r--r--contrib/libs/apache/orc/ya.make4
7 files changed, 95 insertions, 26 deletions
diff --git a/contrib/libs/apache/orc/c++/include/orc/OrcFile.hh b/contrib/libs/apache/orc/c++/include/orc/OrcFile.hh
index 6e4a07bf7c..a9ad692d42 100644
--- a/contrib/libs/apache/orc/c++/include/orc/OrcFile.hh
+++ b/contrib/libs/apache/orc/c++/include/orc/OrcFile.hh
@@ -127,8 +127,8 @@ namespace orc {
* @param path the uri of the file in HDFS
* @param metrics the metrics of the reader
*/
- std::unique_ptr<InputStream> readHdfsFile(const std::string& path,
- ReaderMetrics* metrics = nullptr);
+ [[deprecated("readHdfsFile is deprecated in 2.0.1")]] std::unique_ptr<InputStream> readHdfsFile(
+ const std::string& path, ReaderMetrics* metrics = nullptr);
/**
* Create a reader to read the ORC file.
diff --git a/contrib/libs/apache/orc/c++/include/orc/orc-config.hh b/contrib/libs/apache/orc/c++/include/orc/orc-config.hh
index ab1e16fa15..eb79be002a 100644
--- a/contrib/libs/apache/orc/c++/include/orc/orc-config.hh
+++ b/contrib/libs/apache/orc/c++/include/orc/orc-config.hh
@@ -19,7 +19,7 @@
#ifndef ORC_CONFIG_HH
#define ORC_CONFIG_HH
-#define ORC_VERSION "2.0.0"
+#define ORC_VERSION "2.0.1"
#define ORC_CXX_HAS_CSTDINT
diff --git a/contrib/libs/apache/orc/c++/src/Adaptor-linux.hh b/contrib/libs/apache/orc/c++/src/Adaptor-linux.hh
index b11cdf74cd..286188e3a1 100644
--- a/contrib/libs/apache/orc/c++/src/Adaptor-linux.hh
+++ b/contrib/libs/apache/orc/c++/src/Adaptor-linux.hh
@@ -109,6 +109,7 @@ typedef SSIZE_T ssize_t;
namespace orc {
std::string to_string(double val);
std::string to_string(int64_t val);
+ bool fileExists(const char* path);
}
#ifdef HAS_BUILTIN_OVERFLOW_CHECK
diff --git a/contrib/libs/apache/orc/c++/src/Adaptor.cc b/contrib/libs/apache/orc/c++/src/Adaptor.cc
index d9390131b6..d5dd7802f3 100644
--- a/contrib/libs/apache/orc/c++/src/Adaptor.cc
+++ b/contrib/libs/apache/orc/c++/src/Adaptor.cc
@@ -53,6 +53,12 @@ ssize_t pread(int fd, void* buf, size_t size, off_t offset) {
#endif
#endif
+#ifdef _MSC_VER
+#include <Windows.h>
+#else
+#include <sys/stat.h>
+#endif
+
namespace orc {
#ifdef HAS_DOUBLE_TO_STRING
std::string to_string(double val) {
@@ -73,4 +79,14 @@ namespace orc {
return std::to_string(static_cast<long long int>(val));
}
#endif
+
+ bool fileExists(const char* path) {
+#ifdef _MSC_VER
+ return GetFileAttributesA(path) != INVALID_FILE_ATTRIBUTES;
+#else
+ struct stat st;
+ return stat(path, &st) == 0;
+#endif
+ }
+
} // namespace orc
diff --git a/contrib/libs/apache/orc/c++/src/Timezone.cc b/contrib/libs/apache/orc/c++/src/Timezone.cc
index 27e14480d5..4c78a53a29 100644
--- a/contrib/libs/apache/orc/c++/src/Timezone.cc
+++ b/contrib/libs/apache/orc/c++/src/Timezone.cc
@@ -24,7 +24,7 @@
#include <stdlib.h>
#include <string.h>
#include <time.h>
-#include <filesystem>
+#include <atomic>
#include <map>
#include <sstream>
@@ -656,26 +656,25 @@ namespace orc {
epoch = utcEpoch - getVariant(utcEpoch).gmtOffset;
}
- const char* getTimezoneDirectory() {
+ std::string getTimezoneDirectory() {
const char* dir = getenv("TZDIR");
if (!dir) {
- dir = DEFAULT_TZDIR;
+ // this is present if we're in an activated conda environment
+ const char* condaPrefix = getenv("CONDA_PREFIX");
+ if (condaPrefix) {
+ std::string condaDir(condaPrefix);
+ condaDir += "/share/zoneinfo";
+ return condaDir;
+ } else {
+ dir = DEFAULT_TZDIR;
+ }
}
return dir;
}
- /**
- * Get a timezone by absolute filename.
- * Results are cached.
- */
- const Timezone& getTimezoneByFilename(const std::string& filename) {
- // ORC-110
- std::lock_guard<std::mutex> timezone_lock(timezone_mutex);
- std::map<std::string, std::shared_ptr<Timezone> >::iterator itr = timezoneCache.find(filename);
- if (itr != timezoneCache.end()) {
- return *(itr->second).get();
- }
- if (!std::filesystem::exists(std::filesystem::path(filename))) {
+ static std::vector<unsigned char> loadTZDB(const std::string& filename) {
+ std::vector<unsigned char> buffer;
+ if (!fileExists(filename.c_str())) {
std::stringstream ss;
ss << "Time zone file " << filename << " does not exist."
<< " Please install IANA time zone database and set TZDIR env.";
@@ -684,12 +683,65 @@ namespace orc {
try {
std::unique_ptr<InputStream> file = readFile(filename);
size_t size = static_cast<size_t>(file->getLength());
- std::vector<unsigned char> buffer(size);
+ buffer.resize(size);
file->read(&buffer[0], size, 0);
- timezoneCache[filename] = std::make_shared<TimezoneImpl>(filename, buffer);
} catch (ParseError& err) {
throw TimezoneError(err.what());
}
+ return buffer;
+ }
+
+ class LazyTimezone : public Timezone {
+ private:
+ std::string filename_;
+ mutable std::unique_ptr<TimezoneImpl> impl_;
+ mutable std::once_flag initialized_;
+
+ TimezoneImpl* getImpl() const {
+ std::call_once(initialized_, [&]() {
+ auto buffer = loadTZDB(filename_);
+ impl_ = std::make_unique<TimezoneImpl>(filename_, std::move(buffer));
+ });
+ return impl_.get();
+ }
+
+ public:
+ LazyTimezone(const std::string& filename) : filename_(filename) {}
+
+ const TimezoneVariant& getVariant(int64_t clk) const override {
+ return getImpl()->getVariant(clk);
+ }
+ int64_t getEpoch() const override {
+ return getImpl()->getEpoch();
+ }
+ void print(std::ostream& os) const override {
+ return getImpl()->print(os);
+ }
+ uint64_t getVersion() const override {
+ return getImpl()->getVersion();
+ }
+
+ int64_t convertToUTC(int64_t clk) const override {
+ return getImpl()->convertToUTC(clk);
+ }
+
+ int64_t convertFromUTC(int64_t clk) const override {
+ return getImpl()->convertFromUTC(clk);
+ }
+ };
+
+ /**
+ * Get a timezone by absolute filename.
+ * Results are cached.
+ */
+ const Timezone& getTimezoneByFilename(const std::string& filename) {
+ // ORC-110
+ std::lock_guard<std::mutex> timezone_lock(timezone_mutex);
+ std::map<std::string, std::shared_ptr<Timezone> >::iterator itr = timezoneCache.find(filename);
+ if (itr != timezoneCache.end()) {
+ return *(itr->second).get();
+ }
+ timezoneCache[filename] = std::make_shared<LazyTimezone>(filename);
return *timezoneCache[filename].get();
}
diff --git a/contrib/libs/apache/orc/c++/src/sargs/SargsApplier.cc b/contrib/libs/apache/orc/c++/src/sargs/SargsApplier.cc
index 7032a88126..0e369bf453 100644
--- a/contrib/libs/apache/orc/c++/src/sargs/SargsApplier.cc
+++ b/contrib/libs/apache/orc/c++/src/sargs/SargsApplier.cc
@@ -160,12 +160,12 @@ namespace orc {
}
bool ret = evaluateColumnStatistics(stripeStats.col_stats());
+ if (mMetrics != nullptr) {
+ mMetrics->EvaluatedRowGroupCount.fetch_add(stripeRowGroupCount);
+ }
if (!ret) {
// reset mNextSkippedRows when the current stripe does not satisfy the PPD
mNextSkippedRows.clear();
- if (mMetrics != nullptr) {
- mMetrics->EvaluatedRowGroupCount.fetch_add(stripeRowGroupCount);
- }
}
return ret;
}
@@ -177,7 +177,7 @@ namespace orc {
mFileStatsEvalResult = true;
} else {
mFileStatsEvalResult = evaluateColumnStatistics(footer.statistics());
- if (!mFileStatsEvalResult && mMetrics != nullptr) {
+ if (mMetrics != nullptr) {
mMetrics->EvaluatedRowGroupCount.fetch_add(numRowGroupsInStripeRange);
}
}
diff --git a/contrib/libs/apache/orc/ya.make b/contrib/libs/apache/orc/ya.make
index ec4d745340..da0621ed3f 100644
--- a/contrib/libs/apache/orc/ya.make
+++ b/contrib/libs/apache/orc/ya.make
@@ -6,9 +6,9 @@ LICENSE(Apache-2.0)
LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
-VERSION(2.0.0)
+VERSION(2.0.1)
-ORIGINAL_SOURCE(https://github.com/apache/orc/archive/rel/release-2.0.0.tar.gz)
+ORIGINAL_SOURCE(https://github.com/apache/orc/archive/rel/release-2.0.1.tar.gz)
PEERDIR(
contrib/libs/apache/orc-format