#include "xml-textreader.h" #include <contrib/libs/libxml/include/libxml/xmlreader.h> #include <util/generic/yexception.h> #include <util/string/strip.h> #include <util/system/compiler.h> namespace NXml { TTextReader::TTextReader(IInputStream& stream, const TOptions& options) : Stream(stream) , IsError(false) { Impl.Reset(xmlReaderForIO(ReadFromInputStreamCallback, nullptr, this, nullptr, nullptr, options.GetMask())); if (!Impl) { ythrow yexception() << "cannot instantiate underlying xmlTextReader structure"; } SetupErrorHandler(); CheckForExceptions(); } TTextReader::~TTextReader() { } bool TTextReader::Read() { return BoolResult(xmlTextReaderRead(Impl.Get())); } TString TTextReader::ReadInnerXml() const { return TempStringOrEmptyResult(xmlTextReaderReadInnerXml(Impl.Get())); } TString TTextReader::ReadOuterXml() const { return TempStringOrEmptyResult(xmlTextReaderReadOuterXml(Impl.Get())); } TString TTextReader::ReadString() const { return TempStringOrEmptyResult(xmlTextReaderReadString(Impl.Get())); } bool TTextReader::ReadAttributeValue() const { return BoolResult(xmlTextReaderReadAttributeValue(Impl.Get())); } int TTextReader::GetAttributeCount() const { return IntResult(xmlTextReaderAttributeCount(Impl.Get())); } TStringBuf TTextReader::GetBaseUri() const { return ConstStringOrEmptyResult(xmlTextReaderConstBaseUri(Impl.Get())); } int TTextReader::GetDepth() const { return IntResult(xmlTextReaderDepth(Impl.Get())); } bool TTextReader::HasAttributes() const { return BoolResult(xmlTextReaderHasAttributes(Impl.Get())); } bool TTextReader::HasValue() const { return BoolResult(xmlTextReaderHasValue(Impl.Get())); } bool TTextReader::IsDefault() const { return BoolResult(xmlTextReaderIsDefault(Impl.Get())); } bool TTextReader::IsEmptyElement() const { return BoolResult(xmlTextReaderIsEmptyElement(Impl.Get())); } TStringBuf TTextReader::GetLocalName() const { return ConstStringOrEmptyResult(xmlTextReaderConstLocalName(Impl.Get())); } TStringBuf TTextReader::GetName() const { return ConstStringOrEmptyResult(xmlTextReaderConstName(Impl.Get())); } TStringBuf TTextReader::GetNamespaceUri() const { return ConstStringOrEmptyResult(xmlTextReaderConstNamespaceUri(Impl.Get())); } TTextReader::ENodeType TTextReader::GetNodeType() const { return static_cast<ENodeType>(IntResult(xmlTextReaderNodeType(Impl.Get()))); } TStringBuf TTextReader::GetPrefix() const { return ConstStringOrEmptyResult(xmlTextReaderConstPrefix(Impl.Get())); } char TTextReader::GetQuoteChar() const { return CharResult(xmlTextReaderQuoteChar(Impl.Get())); } TStringBuf TTextReader::GetValue() const { return ConstStringOrEmptyResult(xmlTextReaderConstValue(Impl.Get())); } TTextReader::EReadState TTextReader::GetReadState() const { return static_cast<EReadState>(IntResult(xmlTextReaderReadState(Impl.Get()))); } void TTextReader::Close() { if (xmlTextReaderClose(Impl.Get()) == -1) { ThrowException(); } } TString TTextReader::GetAttribute(int number) const { return TempStringResult(xmlTextReaderGetAttributeNo(Impl.Get(), number)); } TString TTextReader::GetAttribute(TZtStringBuf name) const { return TempStringResult(xmlTextReaderGetAttribute(Impl.Get(), XMLCHAR(name.data()))); } TString TTextReader::GetAttribute(TZtStringBuf localName, TZtStringBuf nsUri) const { return TempStringResult(xmlTextReaderGetAttributeNs(Impl.Get(), XMLCHAR(localName.data()), XMLCHAR(nsUri.data()))); } TString TTextReader::LookupNamespace(TZtStringBuf prefix) const { return TempStringResult(xmlTextReaderLookupNamespace(Impl.Get(), XMLCHAR(prefix.data()))); } bool TTextReader::MoveToAttribute(int number) { return BoolResult(xmlTextReaderMoveToAttributeNo(Impl.Get(), number)); } bool TTextReader::MoveToAttribute(TZtStringBuf name) { return BoolResult(xmlTextReaderMoveToAttribute(Impl.Get(), XMLCHAR(name.data()))); } bool TTextReader::MoveToAttribute(TZtStringBuf localName, TZtStringBuf nsUri) { return BoolResult(xmlTextReaderMoveToAttributeNs(Impl.Get(), XMLCHAR(localName.data()), XMLCHAR(nsUri.data()))); } bool TTextReader::MoveToFirstAttribute() { return BoolResult(xmlTextReaderMoveToFirstAttribute(Impl.Get())); } bool TTextReader::MoveToNextAttribute() { return BoolResult(xmlTextReaderMoveToNextAttribute(Impl.Get())); } bool TTextReader::MoveToElement() { return BoolResult(xmlTextReaderMoveToElement(Impl.Get())); } TConstNode TTextReader::Expand() const { const xmlNodePtr node = xmlTextReaderExpand(Impl.Get()); if (node == nullptr) { ThrowException(); } return TConstNode(TNode(node->doc, node)); } bool TTextReader::Next() { return BoolResult(xmlTextReaderNext(Impl.Get())); } bool TTextReader::IsValid() const { return BoolResult(xmlTextReaderIsValid(Impl.Get())); } // Callback for xmlReaderForIO() to read more data. // It is almost "noexcept" (std::bad_alloc may happen when saving exception message to new TString). // Waiting for std::exception_ptr and std::rethrow_exception from C++11 in Arcadia to make it really "noexcept". int TTextReader::ReadFromInputStreamCallback(void* context, char* buffer, int len) { Y_ASSERT(len >= 0); TTextReader* reader = static_cast<TTextReader*>(context); int result = -1; // Exception may be thrown by IInputStream::Read(). // It is caught unconditionally because exceptions cannot safely pass through libxml2 plain C code // (no destructors, no RAII, raw pointers, so in case of stack unwinding some memory gets leaked). try { result = reader->Stream.Read(buffer, len); } catch (const yexception& ex) { reader->LogError() << "read from input stream failed: " << ex; } catch (...) { reader->LogError() << "read from input stream failed"; } return result; } void TTextReader::OnLibxmlError(void* arg, const char* msg, xmlParserSeverities severity, xmlTextReaderLocatorPtr locator) { TTextReader* reader = static_cast<TTextReader*>(arg); Y_ASSERT(reader != nullptr); TStringStream& out = reader->LogError(); if (severity == XML_PARSER_SEVERITY_ERROR) { out << "libxml parse error"; } else if (severity == XML_PARSER_SEVERITY_VALIDITY_ERROR) { out << "libxml validity error"; } else { out << "libxml error"; } if (locator != nullptr) { const int line = xmlTextReaderLocatorLineNumber(locator); const TCharPtr baseUri = xmlTextReaderLocatorBaseURI(locator); out << " ("; if (line != -1) { out << "at line " << line; if (baseUri) { out << ", "; } } if (baseUri) { out << "base URI " << CAST2CHAR(baseUri.Get()); } out << ")"; } TStringBuf message = (msg != nullptr) ? msg : "unknown"; message = StripStringRight(message); // remove trailing \n that is added by libxml if (!message.empty()) { out << ": " << message; } } void TTextReader::SetupErrorHandler() { xmlTextReaderErrorFunc func = nullptr; void* arg = nullptr; // We respect any other error handlers already set up: xmlTextReaderGetErrorHandler(Impl.Get(), &func, &arg); if (!func) { func = TTextReader::OnLibxmlError; xmlTextReaderSetErrorHandler(Impl.Get(), func, this); } } TStringStream& TTextReader::LogError() const { if (IsError) { // maybe there are previous errors ErrorBuffer << Endl; } IsError = true; return ErrorBuffer; } void TTextReader::CheckForExceptions() const { if (Y_LIKELY(!IsError)) { return; } const TString message = ErrorBuffer.Str(); ErrorBuffer.clear(); IsError = false; ythrow yexception() << message; } void TTextReader::ThrowException() const { CheckForExceptions(); // Probably CheckForExceptions() would throw an exception with more verbose message. As the last resort // (we do not even know the name of the failed libxml function, but it's possible to deduce it from stacktrace): ythrow yexception() << "libxml function returned error exit code"; } bool TTextReader::BoolResult(int value) const { if (Y_UNLIKELY(value == -1)) { ThrowException(); } return (value != 0); } int TTextReader::IntResult(int value) const { if (Y_UNLIKELY(value == -1)) { ThrowException(); } return value; } char TTextReader::CharResult(int value) const { if (Y_UNLIKELY(value == -1)) { ThrowException(); } return static_cast<char>(value); } TStringBuf TTextReader::ConstStringResult(const xmlChar* value) const { if (Y_UNLIKELY(value == nullptr)) { ThrowException(); } return CAST2CHAR(value); } TStringBuf TTextReader::ConstStringOrEmptyResult(const xmlChar* value) const { CheckForExceptions(); return (value != nullptr) ? TStringBuf(CAST2CHAR(value)) : TStringBuf(); } TString TTextReader::TempStringResult(TCharPtr value) const { if (Y_UNLIKELY(value == nullptr)) { ThrowException(); } return TString(CAST2CHAR(value.Get())); } TString TTextReader::TempStringOrEmptyResult(TCharPtr value) const { CheckForExceptions(); return (value != nullptr) ? TString(CAST2CHAR(value.Get())) : TString(); } struct TTextReader::TDeleter { static inline void Destroy(xmlTextReaderPtr handle) { xmlFreeTextReader(handle); } }; }