aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/xml/document/xml-textreader.cpp
diff options
context:
space:
mode:
authorDevtools Arcadia <arcadia-devtools@yandex-team.ru>2022-02-07 18:08:42 +0300
committerDevtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net>2022-02-07 18:08:42 +0300
commit1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch)
treee26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/xml/document/xml-textreader.cpp
downloadydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/xml/document/xml-textreader.cpp')
-rw-r--r--library/cpp/xml/document/xml-textreader.cpp318
1 files changed, 318 insertions, 0 deletions
diff --git a/library/cpp/xml/document/xml-textreader.cpp b/library/cpp/xml/document/xml-textreader.cpp
new file mode 100644
index 0000000000..b946f1fbf2
--- /dev/null
+++ b/library/cpp/xml/document/xml-textreader.cpp
@@ -0,0 +1,318 @@
+#include "xml-textreader.h"
+
+#include <contrib/libs/libxml/include/libxml/xmlreader.h>
+
+#include <util/generic/yexception.h>
+#include <util/string/strip.h>
+#include <util/system/compiler.h>
+
+namespace NXml {
+ TTextReader::TTextReader(IInputStream& stream, const TOptions& options)
+ : Stream(stream)
+ , IsError(false)
+ {
+ Impl.Reset(xmlReaderForIO(ReadFromInputStreamCallback, nullptr, this, nullptr, nullptr, options.GetMask()));
+
+ if (!Impl) {
+ ythrow yexception() << "cannot instantiate underlying xmlTextReader structure";
+ }
+ SetupErrorHandler();
+ CheckForExceptions();
+ }
+
+ TTextReader::~TTextReader() {
+ }
+
+ bool TTextReader::Read() {
+ return BoolResult(xmlTextReaderRead(Impl.Get()));
+ }
+
+ TString TTextReader::ReadInnerXml() const {
+ return TempStringOrEmptyResult(xmlTextReaderReadInnerXml(Impl.Get()));
+ }
+
+ TString TTextReader::ReadOuterXml() const {
+ return TempStringOrEmptyResult(xmlTextReaderReadOuterXml(Impl.Get()));
+ }
+
+ TString TTextReader::ReadString() const {
+ return TempStringOrEmptyResult(xmlTextReaderReadString(Impl.Get()));
+ }
+
+ bool TTextReader::ReadAttributeValue() const {
+ return BoolResult(xmlTextReaderReadAttributeValue(Impl.Get()));
+ }
+
+ int TTextReader::GetAttributeCount() const {
+ return IntResult(xmlTextReaderAttributeCount(Impl.Get()));
+ }
+
+ TStringBuf TTextReader::GetBaseUri() const {
+ return ConstStringOrEmptyResult(xmlTextReaderConstBaseUri(Impl.Get()));
+ }
+
+ int TTextReader::GetDepth() const {
+ return IntResult(xmlTextReaderDepth(Impl.Get()));
+ }
+
+ bool TTextReader::HasAttributes() const {
+ return BoolResult(xmlTextReaderHasAttributes(Impl.Get()));
+ }
+
+ bool TTextReader::HasValue() const {
+ return BoolResult(xmlTextReaderHasValue(Impl.Get()));
+ }
+
+ bool TTextReader::IsDefault() const {
+ return BoolResult(xmlTextReaderIsDefault(Impl.Get()));
+ }
+
+ bool TTextReader::IsEmptyElement() const {
+ return BoolResult(xmlTextReaderIsEmptyElement(Impl.Get()));
+ }
+
+ TStringBuf TTextReader::GetLocalName() const {
+ return ConstStringOrEmptyResult(xmlTextReaderConstLocalName(Impl.Get()));
+ }
+
+ TStringBuf TTextReader::GetName() const {
+ return ConstStringOrEmptyResult(xmlTextReaderConstName(Impl.Get()));
+ }
+
+ TStringBuf TTextReader::GetNamespaceUri() const {
+ return ConstStringOrEmptyResult(xmlTextReaderConstNamespaceUri(Impl.Get()));
+ }
+
+ TTextReader::ENodeType TTextReader::GetNodeType() const {
+ return static_cast<ENodeType>(IntResult(xmlTextReaderNodeType(Impl.Get())));
+ }
+
+ TStringBuf TTextReader::GetPrefix() const {
+ return ConstStringOrEmptyResult(xmlTextReaderConstPrefix(Impl.Get()));
+ }
+
+ char TTextReader::GetQuoteChar() const {
+ return CharResult(xmlTextReaderQuoteChar(Impl.Get()));
+ }
+
+ TStringBuf TTextReader::GetValue() const {
+ return ConstStringOrEmptyResult(xmlTextReaderConstValue(Impl.Get()));
+ }
+
+ TTextReader::EReadState TTextReader::GetReadState() const {
+ return static_cast<EReadState>(IntResult(xmlTextReaderReadState(Impl.Get())));
+ }
+
+ void TTextReader::Close() {
+ if (xmlTextReaderClose(Impl.Get()) == -1) {
+ ThrowException();
+ }
+ }
+
+ TString TTextReader::GetAttribute(int number) const {
+ return TempStringResult(xmlTextReaderGetAttributeNo(Impl.Get(), number));
+ }
+
+ TString TTextReader::GetAttribute(TZtStringBuf name) const {
+ return TempStringResult(xmlTextReaderGetAttribute(Impl.Get(), XMLCHAR(name.data())));
+ }
+
+ TString TTextReader::GetAttribute(TZtStringBuf localName, TZtStringBuf nsUri) const {
+ return TempStringResult(xmlTextReaderGetAttributeNs(Impl.Get(), XMLCHAR(localName.data()), XMLCHAR(nsUri.data())));
+ }
+
+ TString TTextReader::LookupNamespace(TZtStringBuf prefix) const {
+ return TempStringResult(xmlTextReaderLookupNamespace(Impl.Get(), XMLCHAR(prefix.data())));
+ }
+
+ bool TTextReader::MoveToAttribute(int number) {
+ return BoolResult(xmlTextReaderMoveToAttributeNo(Impl.Get(), number));
+ }
+
+ bool TTextReader::MoveToAttribute(TZtStringBuf name) {
+ return BoolResult(xmlTextReaderMoveToAttribute(Impl.Get(), XMLCHAR(name.data())));
+ }
+
+ bool TTextReader::MoveToAttribute(TZtStringBuf localName, TZtStringBuf nsUri) {
+ return BoolResult(xmlTextReaderMoveToAttributeNs(Impl.Get(), XMLCHAR(localName.data()), XMLCHAR(nsUri.data())));
+ }
+
+ bool TTextReader::MoveToFirstAttribute() {
+ return BoolResult(xmlTextReaderMoveToFirstAttribute(Impl.Get()));
+ }
+
+ bool TTextReader::MoveToNextAttribute() {
+ return BoolResult(xmlTextReaderMoveToNextAttribute(Impl.Get()));
+ }
+
+ bool TTextReader::MoveToElement() {
+ return BoolResult(xmlTextReaderMoveToElement(Impl.Get()));
+ }
+
+ TConstNode TTextReader::Expand() const {
+ const xmlNodePtr node = xmlTextReaderExpand(Impl.Get());
+ if (node == nullptr) {
+ ThrowException();
+ }
+ return TConstNode(TNode(node->doc, node));
+ }
+
+ bool TTextReader::Next() {
+ return BoolResult(xmlTextReaderNext(Impl.Get()));
+ }
+
+ bool TTextReader::IsValid() const {
+ return BoolResult(xmlTextReaderIsValid(Impl.Get()));
+ }
+
+ // Callback for xmlReaderForIO() to read more data.
+ // It is almost "noexcept" (std::bad_alloc may happen when saving exception message to new TString).
+ // Waiting for std::exception_ptr and std::rethrow_exception from C++11 in Arcadia to make it really "noexcept".
+ int TTextReader::ReadFromInputStreamCallback(void* context, char* buffer, int len) {
+ Y_ASSERT(len >= 0);
+ TTextReader* reader = static_cast<TTextReader*>(context);
+
+ int result = -1;
+
+ // Exception may be thrown by IInputStream::Read().
+ // It is caught unconditionally because exceptions cannot safely pass through libxml2 plain C code
+ // (no destructors, no RAII, raw pointers, so in case of stack unwinding some memory gets leaked).
+
+ try {
+ result = reader->Stream.Read(buffer, len);
+ } catch (const yexception& ex) {
+ reader->LogError() << "read from input stream failed: " << ex;
+ } catch (...) {
+ reader->LogError() << "read from input stream failed";
+ }
+
+ return result;
+ }
+
+ void TTextReader::OnLibxmlError(void* arg, const char* msg, xmlParserSeverities severity, xmlTextReaderLocatorPtr locator) {
+ TTextReader* reader = static_cast<TTextReader*>(arg);
+ Y_ASSERT(reader != nullptr);
+
+ TStringStream& out = reader->LogError();
+
+ if (severity == XML_PARSER_SEVERITY_ERROR) {
+ out << "libxml parse error";
+ } else if (severity == XML_PARSER_SEVERITY_VALIDITY_ERROR) {
+ out << "libxml validity error";
+ } else {
+ out << "libxml error";
+ }
+
+ if (locator != nullptr) {
+ const int line = xmlTextReaderLocatorLineNumber(locator);
+ const TCharPtr baseUri = xmlTextReaderLocatorBaseURI(locator);
+ out << " (";
+ if (line != -1) {
+ out << "at line " << line;
+ if (baseUri) {
+ out << ", ";
+ }
+ }
+ if (baseUri) {
+ out << "base URI " << CAST2CHAR(baseUri.Get());
+ }
+ out << ")";
+ }
+
+ TStringBuf message = (msg != nullptr) ? msg : "unknown";
+ message = StripStringRight(message); // remove trailing \n that is added by libxml
+ if (!message.empty()) {
+ out << ": " << message;
+ }
+ }
+
+ void TTextReader::SetupErrorHandler() {
+ xmlTextReaderErrorFunc func = nullptr;
+ void* arg = nullptr;
+
+ // We respect any other error handlers already set up:
+ xmlTextReaderGetErrorHandler(Impl.Get(), &func, &arg);
+ if (!func) {
+ func = TTextReader::OnLibxmlError;
+ xmlTextReaderSetErrorHandler(Impl.Get(), func, this);
+ }
+ }
+
+ TStringStream& TTextReader::LogError() const {
+ if (IsError) { // maybe there are previous errors
+ ErrorBuffer << Endl;
+ }
+ IsError = true;
+ return ErrorBuffer;
+ }
+
+ void TTextReader::CheckForExceptions() const {
+ if (Y_LIKELY(!IsError)) {
+ return;
+ }
+
+ const TString message = ErrorBuffer.Str();
+ ErrorBuffer.clear();
+ IsError = false;
+
+ ythrow yexception() << message;
+ }
+
+ void TTextReader::ThrowException() const {
+ CheckForExceptions();
+ // Probably CheckForExceptions() would throw an exception with more verbose message. As the last resort
+ // (we do not even know the name of the failed libxml function, but it's possible to deduce it from stacktrace):
+ ythrow yexception() << "libxml function returned error exit code";
+ }
+
+ bool TTextReader::BoolResult(int value) const {
+ if (Y_UNLIKELY(value == -1)) {
+ ThrowException();
+ }
+ return (value != 0);
+ }
+
+ int TTextReader::IntResult(int value) const {
+ if (Y_UNLIKELY(value == -1)) {
+ ThrowException();
+ }
+ return value;
+ }
+
+ char TTextReader::CharResult(int value) const {
+ if (Y_UNLIKELY(value == -1)) {
+ ThrowException();
+ }
+ return static_cast<char>(value);
+ }
+
+ TStringBuf TTextReader::ConstStringResult(const xmlChar* value) const {
+ if (Y_UNLIKELY(value == nullptr)) {
+ ThrowException();
+ }
+ return CAST2CHAR(value);
+ }
+
+ TStringBuf TTextReader::ConstStringOrEmptyResult(const xmlChar* value) const {
+ CheckForExceptions();
+ return (value != nullptr) ? TStringBuf(CAST2CHAR(value)) : TStringBuf();
+ }
+
+ TString TTextReader::TempStringResult(TCharPtr value) const {
+ if (Y_UNLIKELY(value == nullptr)) {
+ ThrowException();
+ }
+ return TString(CAST2CHAR(value.Get()));
+ }
+
+ TString TTextReader::TempStringOrEmptyResult(TCharPtr value) const {
+ CheckForExceptions();
+ return (value != nullptr) ? TString(CAST2CHAR(value.Get())) : TString();
+ }
+
+ struct TTextReader::TDeleter {
+ static inline void Destroy(xmlTextReaderPtr handle) {
+ xmlFreeTextReader(handle);
+ }
+ };
+}