diff options
author | robot-contrib <robot-contrib@yandex-team.com> | 2024-05-17 12:05:29 +0300 |
---|---|---|
committer | robot-contrib <robot-contrib@yandex-team.com> | 2024-05-17 12:21:18 +0300 |
commit | 4a008c0eb2019067beb27c6118db587e96679ec5 (patch) | |
tree | 313d6380b47866f294f10acfaeccd0407b9cb8bc | |
parent | 2bd98b5c8ddd404f62db9f74144a763de01341cc (diff) | |
download | ydb-4a008c0eb2019067beb27c6118db587e96679ec5.tar.gz |
Update contrib/libs/apache/orc to 2.0.1
855bd01a1b384d08c59ee86c6b493ea32190ef19
-rw-r--r-- | contrib/libs/apache/orc/c++/include/orc/OrcFile.hh | 4 | ||||
-rw-r--r-- | contrib/libs/apache/orc/c++/include/orc/orc-config.hh | 2 | ||||
-rw-r--r-- | contrib/libs/apache/orc/c++/src/Adaptor-linux.hh | 1 | ||||
-rw-r--r-- | contrib/libs/apache/orc/c++/src/Adaptor.cc | 16 | ||||
-rw-r--r-- | contrib/libs/apache/orc/c++/src/Timezone.cc | 86 | ||||
-rw-r--r-- | contrib/libs/apache/orc/c++/src/sargs/SargsApplier.cc | 8 | ||||
-rw-r--r-- | contrib/libs/apache/orc/ya.make | 4 |
7 files changed, 95 insertions, 26 deletions
diff --git a/contrib/libs/apache/orc/c++/include/orc/OrcFile.hh b/contrib/libs/apache/orc/c++/include/orc/OrcFile.hh index 6e4a07bf7c..a9ad692d42 100644 --- a/contrib/libs/apache/orc/c++/include/orc/OrcFile.hh +++ b/contrib/libs/apache/orc/c++/include/orc/OrcFile.hh @@ -127,8 +127,8 @@ namespace orc { * @param path the uri of the file in HDFS * @param metrics the metrics of the reader */ - std::unique_ptr<InputStream> readHdfsFile(const std::string& path, - ReaderMetrics* metrics = nullptr); + [[deprecated("readHdfsFile is deprecated in 2.0.1")]] std::unique_ptr<InputStream> readHdfsFile( + const std::string& path, ReaderMetrics* metrics = nullptr); /** * Create a reader to read the ORC file. diff --git a/contrib/libs/apache/orc/c++/include/orc/orc-config.hh b/contrib/libs/apache/orc/c++/include/orc/orc-config.hh index ab1e16fa15..eb79be002a 100644 --- a/contrib/libs/apache/orc/c++/include/orc/orc-config.hh +++ b/contrib/libs/apache/orc/c++/include/orc/orc-config.hh @@ -19,7 +19,7 @@ #ifndef ORC_CONFIG_HH #define ORC_CONFIG_HH -#define ORC_VERSION "2.0.0" +#define ORC_VERSION "2.0.1" #define ORC_CXX_HAS_CSTDINT diff --git a/contrib/libs/apache/orc/c++/src/Adaptor-linux.hh b/contrib/libs/apache/orc/c++/src/Adaptor-linux.hh index b11cdf74cd..286188e3a1 100644 --- a/contrib/libs/apache/orc/c++/src/Adaptor-linux.hh +++ b/contrib/libs/apache/orc/c++/src/Adaptor-linux.hh @@ -109,6 +109,7 @@ typedef SSIZE_T ssize_t; namespace orc { std::string to_string(double val); std::string to_string(int64_t val); + bool fileExists(const char* path); } #ifdef HAS_BUILTIN_OVERFLOW_CHECK diff --git a/contrib/libs/apache/orc/c++/src/Adaptor.cc b/contrib/libs/apache/orc/c++/src/Adaptor.cc index d9390131b6..d5dd7802f3 100644 --- a/contrib/libs/apache/orc/c++/src/Adaptor.cc +++ b/contrib/libs/apache/orc/c++/src/Adaptor.cc @@ -53,6 +53,12 @@ ssize_t pread(int fd, void* buf, size_t size, off_t offset) { #endif #endif +#ifdef _MSC_VER +#include <Windows.h> +#else +#include <sys/stat.h> +#endif + namespace orc { #ifdef HAS_DOUBLE_TO_STRING std::string to_string(double val) { @@ -73,4 +79,14 @@ namespace orc { return std::to_string(static_cast<long long int>(val)); } #endif + + bool fileExists(const char* path) { +#ifdef _MSC_VER + return GetFileAttributesA(path) != INVALID_FILE_ATTRIBUTES; +#else + struct stat st; + return stat(path, &st) == 0; +#endif + } + } // namespace orc diff --git a/contrib/libs/apache/orc/c++/src/Timezone.cc b/contrib/libs/apache/orc/c++/src/Timezone.cc index 27e14480d5..4c78a53a29 100644 --- a/contrib/libs/apache/orc/c++/src/Timezone.cc +++ b/contrib/libs/apache/orc/c++/src/Timezone.cc @@ -24,7 +24,7 @@ #include <stdlib.h> #include <string.h> #include <time.h> -#include <filesystem> +#include <atomic> #include <map> #include <sstream> @@ -656,26 +656,25 @@ namespace orc { epoch = utcEpoch - getVariant(utcEpoch).gmtOffset; } - const char* getTimezoneDirectory() { + std::string getTimezoneDirectory() { const char* dir = getenv("TZDIR"); if (!dir) { - dir = DEFAULT_TZDIR; + // this is present if we're in an activated conda environment + const char* condaPrefix = getenv("CONDA_PREFIX"); + if (condaPrefix) { + std::string condaDir(condaPrefix); + condaDir += "/share/zoneinfo"; + return condaDir; + } else { + dir = DEFAULT_TZDIR; + } } return dir; } - /** - * Get a timezone by absolute filename. - * Results are cached. - */ - const Timezone& getTimezoneByFilename(const std::string& filename) { - // ORC-110 - std::lock_guard<std::mutex> timezone_lock(timezone_mutex); - std::map<std::string, std::shared_ptr<Timezone> >::iterator itr = timezoneCache.find(filename); - if (itr != timezoneCache.end()) { - return *(itr->second).get(); - } - if (!std::filesystem::exists(std::filesystem::path(filename))) { + static std::vector<unsigned char> loadTZDB(const std::string& filename) { + std::vector<unsigned char> buffer; + if (!fileExists(filename.c_str())) { std::stringstream ss; ss << "Time zone file " << filename << " does not exist." << " Please install IANA time zone database and set TZDIR env."; @@ -684,12 +683,65 @@ namespace orc { try { std::unique_ptr<InputStream> file = readFile(filename); size_t size = static_cast<size_t>(file->getLength()); - std::vector<unsigned char> buffer(size); + buffer.resize(size); file->read(&buffer[0], size, 0); - timezoneCache[filename] = std::make_shared<TimezoneImpl>(filename, buffer); } catch (ParseError& err) { throw TimezoneError(err.what()); } + return buffer; + } + + class LazyTimezone : public Timezone { + private: + std::string filename_; + mutable std::unique_ptr<TimezoneImpl> impl_; + mutable std::once_flag initialized_; + + TimezoneImpl* getImpl() const { + std::call_once(initialized_, [&]() { + auto buffer = loadTZDB(filename_); + impl_ = std::make_unique<TimezoneImpl>(filename_, std::move(buffer)); + }); + return impl_.get(); + } + + public: + LazyTimezone(const std::string& filename) : filename_(filename) {} + + const TimezoneVariant& getVariant(int64_t clk) const override { + return getImpl()->getVariant(clk); + } + int64_t getEpoch() const override { + return getImpl()->getEpoch(); + } + void print(std::ostream& os) const override { + return getImpl()->print(os); + } + uint64_t getVersion() const override { + return getImpl()->getVersion(); + } + + int64_t convertToUTC(int64_t clk) const override { + return getImpl()->convertToUTC(clk); + } + + int64_t convertFromUTC(int64_t clk) const override { + return getImpl()->convertFromUTC(clk); + } + }; + + /** + * Get a timezone by absolute filename. + * Results are cached. + */ + const Timezone& getTimezoneByFilename(const std::string& filename) { + // ORC-110 + std::lock_guard<std::mutex> timezone_lock(timezone_mutex); + std::map<std::string, std::shared_ptr<Timezone> >::iterator itr = timezoneCache.find(filename); + if (itr != timezoneCache.end()) { + return *(itr->second).get(); + } + timezoneCache[filename] = std::make_shared<LazyTimezone>(filename); return *timezoneCache[filename].get(); } diff --git a/contrib/libs/apache/orc/c++/src/sargs/SargsApplier.cc b/contrib/libs/apache/orc/c++/src/sargs/SargsApplier.cc index 7032a88126..0e369bf453 100644 --- a/contrib/libs/apache/orc/c++/src/sargs/SargsApplier.cc +++ b/contrib/libs/apache/orc/c++/src/sargs/SargsApplier.cc @@ -160,12 +160,12 @@ namespace orc { } bool ret = evaluateColumnStatistics(stripeStats.col_stats()); + if (mMetrics != nullptr) { + mMetrics->EvaluatedRowGroupCount.fetch_add(stripeRowGroupCount); + } if (!ret) { // reset mNextSkippedRows when the current stripe does not satisfy the PPD mNextSkippedRows.clear(); - if (mMetrics != nullptr) { - mMetrics->EvaluatedRowGroupCount.fetch_add(stripeRowGroupCount); - } } return ret; } @@ -177,7 +177,7 @@ namespace orc { mFileStatsEvalResult = true; } else { mFileStatsEvalResult = evaluateColumnStatistics(footer.statistics()); - if (!mFileStatsEvalResult && mMetrics != nullptr) { + if (mMetrics != nullptr) { mMetrics->EvaluatedRowGroupCount.fetch_add(numRowGroupsInStripeRange); } } diff --git a/contrib/libs/apache/orc/ya.make b/contrib/libs/apache/orc/ya.make index ec4d745340..da0621ed3f 100644 --- a/contrib/libs/apache/orc/ya.make +++ b/contrib/libs/apache/orc/ya.make @@ -6,9 +6,9 @@ LICENSE(Apache-2.0) LICENSE_TEXTS(.yandex_meta/licenses.list.txt) -VERSION(2.0.0) +VERSION(2.0.1) -ORIGINAL_SOURCE(https://github.com/apache/orc/archive/rel/release-2.0.0.tar.gz) +ORIGINAL_SOURCE(https://github.com/apache/orc/archive/rel/release-2.0.1.tar.gz) PEERDIR( contrib/libs/apache/orc-format |