diff options
author | sobols <sobols@yandex-team.ru> | 2022-02-10 16:47:08 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:47:08 +0300 |
commit | 09961b69c61f471ddd594e0fd877df62a8021562 (patch) | |
tree | 54a7b60a9526a7104557a033eb0a8d70d64b604c /library/cpp/xml | |
parent | 4ce8835206f981afa4a61915a49a21fb750416ec (diff) | |
download | ydb-09961b69c61f471ddd594e0fd877df62a8021562.tar.gz |
Restoring authorship annotation for <sobols@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'library/cpp/xml')
-rw-r--r-- | library/cpp/xml/document/libxml-guards.h | 2 | ||||
-rw-r--r-- | library/cpp/xml/document/ut/ya.make | 2 | ||||
-rw-r--r-- | library/cpp/xml/document/xml-document-decl.h | 202 | ||||
-rw-r--r-- | library/cpp/xml/document/xml-document.cpp | 120 | ||||
-rw-r--r-- | library/cpp/xml/document/xml-document_ut.cpp | 172 | ||||
-rw-r--r-- | library/cpp/xml/document/xml-textreader.cpp | 572 | ||||
-rw-r--r-- | library/cpp/xml/document/xml-textreader.h | 612 | ||||
-rw-r--r-- | library/cpp/xml/document/xml-textreader_ut.cpp | 422 | ||||
-rw-r--r-- | library/cpp/xml/document/ya.make | 2 |
9 files changed, 1053 insertions, 1053 deletions
diff --git a/library/cpp/xml/document/libxml-guards.h b/library/cpp/xml/document/libxml-guards.h index 4188cecff1..479ce285e5 100644 --- a/library/cpp/xml/document/libxml-guards.h +++ b/library/cpp/xml/document/libxml-guards.h @@ -40,7 +40,7 @@ namespace NXml { typedef TxmlXPathObjectPtr TXPathObjectPtr; typedef TAutoPtr<char, NDetail::TSignedCharPtrTraits> TSignedCharPtr; typedef TAutoPtr<xmlChar, NDetail::TCharPtrTraits> TCharPtr; - typedef TxmlDocHolder TDocHolder; + typedef TxmlDocHolder TDocHolder; typedef TxmlURIPtr TURIPtr; typedef TxmlNodePtr TNodePtr; typedef TAutoPtr<xmlOutputBuffer, NDetail::TOutputBufferPtrTraits> TOutputBufferPtr; diff --git a/library/cpp/xml/document/ut/ya.make b/library/cpp/xml/document/ut/ya.make index e955448c66..9a7213baa0 100644 --- a/library/cpp/xml/document/ut/ya.make +++ b/library/cpp/xml/document/ut/ya.make @@ -4,7 +4,7 @@ OWNER(finder) SRCS( xml-document_ut.cpp - xml-textreader_ut.cpp + xml-textreader_ut.cpp xml-options_ut.cpp ) diff --git a/library/cpp/xml/document/xml-document-decl.h b/library/cpp/xml/document/xml-document-decl.h index bfda1fb7e6..cde87a311b 100644 --- a/library/cpp/xml/document/xml-document-decl.h +++ b/library/cpp/xml/document/xml-document-decl.h @@ -15,8 +15,8 @@ namespace NXml { class TConstNodes; class TConstNode; - using TXPathContext = xmlXPathContext; - + using TXPathContext = xmlXPathContext; + class TDocument { public: enum Source { @@ -35,11 +35,11 @@ namespace NXml { TDocument(const TString& source, Source type = File); public: - TDocument(const TDocument& that) = delete; - TDocument& operator=(const TDocument& that) = delete; + TDocument(const TDocument& that) = delete; + TDocument& operator=(const TDocument& that) = delete; - TDocument(TDocument&& that); - TDocument& operator=(TDocument&& that); + TDocument(TDocument&& that); + TDocument& operator=(TDocument&& that); /** * get root element @@ -74,12 +74,12 @@ namespace NXml { void ParseFile(const TString& file); void ParseString(TZtStringBuf xml); - TDocument(TDocHolder doc) - : Doc(std::move(doc)) + TDocument(TDocHolder doc) + : Doc(std::move(doc)) { } - TDocHolder Doc; + TDocHolder Doc; }; struct TNamespaceForXPath { @@ -150,7 +150,7 @@ namespace NXml { friend class TConstNode; friend class TNode; - TConstNodes(xmlDoc* doc, TXPathObjectPtr obj); + TConstNodes(xmlDoc* doc, TXPathObjectPtr obj); size_t SizeValue; xmlDoc* Doc; @@ -161,7 +161,7 @@ namespace NXml { public: friend class TDocument; friend class TConstNode; - friend class TTextReader; + friend class TTextReader; /** * check if node is null @@ -174,18 +174,18 @@ namespace NXml { bool IsElementNode() const; /** - * Create xpath context to be used later for fast xpath evaluation. - * @param nss: explicitly specify XML namespaces to use and their prefixes - * - * For better performance, when you need to evaluate several xpath expressions, - * it makes sense to create a context, load namespace prefixes once - * and use the context several times in Node(), Nodes(), XPath() function calls for several nodes. - * The context may be used with any node of the current document, but - * cannot be shared between different XML documents. - */ - TXPathContextPtr CreateXPathContext(const TNamespacesForXPath& nss = TNamespacesForXPath()) const; - - /** + * Create xpath context to be used later for fast xpath evaluation. + * @param nss: explicitly specify XML namespaces to use and their prefixes + * + * For better performance, when you need to evaluate several xpath expressions, + * it makes sense to create a context, load namespace prefixes once + * and use the context several times in Node(), Nodes(), XPath() function calls for several nodes. + * The context may be used with any node of the current document, but + * cannot be shared between different XML documents. + */ + TXPathContextPtr CreateXPathContext(const TNamespacesForXPath& nss = TNamespacesForXPath()) const; + + /** * get all element nodes matching given xpath expression * @param xpath: xpath expression * @param quiet: don't throw exception if zero nodes found @@ -197,17 +197,17 @@ namespace NXml { TConstNodes Nodes(TZtStringBuf xpath, bool quiet = false, const TNamespacesForXPath& ns = TNamespacesForXPath()) const; /** - * get all element nodes matching given xpath expression - * @param xpath: xpath expression - * @param quiet: don't throw exception if zero nodes found - * @param ctxt: reusable xpath context - * - * For historical reasons, this only works for *element* nodes. - * Use the XPath function if you need other kinds of nodes. - */ + * get all element nodes matching given xpath expression + * @param xpath: xpath expression + * @param quiet: don't throw exception if zero nodes found + * @param ctxt: reusable xpath context + * + * For historical reasons, this only works for *element* nodes. + * Use the XPath function if you need other kinds of nodes. + */ TConstNodes Nodes(TZtStringBuf xpath, bool quiet, TXPathContext& ctxt) const; - - /** + + /** * get all nodes matching given xpath expression * @param xpath: xpath expression * @param quiet: don't throw exception if zero nodes found @@ -216,18 +216,18 @@ namespace NXml { TConstNodes XPath(TZtStringBuf xpath, bool quiet = false, const TNamespacesForXPath& ns = TNamespacesForXPath()) const; /** - * get all nodes matching given xpath expression - * @param xpath: xpath expression - * @param quiet: don't throw exception if zero nodes found - * @param ctxt: reusable xpath context - */ + * get all nodes matching given xpath expression + * @param xpath: xpath expression + * @param quiet: don't throw exception if zero nodes found + * @param ctxt: reusable xpath context + */ TConstNodes XPath(TZtStringBuf xpath, bool quiet, TXPathContext& ctxt) const; - - /** + + /** * get the first element node matching given xpath expression * @param xpath: path to node (from current node) * @param quiet: don't throw exception if node not found, - * return null node (@see IsNull()) + * return null node (@see IsNull()) * @param ns: explicitly specify XML namespaces to use and their prefixes * * For historical reasons, this only works for *element* nodes. @@ -238,23 +238,23 @@ namespace NXml { TConstNode Node(TZtStringBuf xpath, bool quiet = false, const TNamespacesForXPath& ns = TNamespacesForXPath()) const; /** - * get the first element node matching given xpath expression - * @param xpath: path to node (from current node) - * @param quiet: don't throw exception if node not found, - * return null node (@see IsNull()) - * @param ctxt: reusable xpath context - * - * For historical reasons, this only works for *element* nodes. - * Use the XPath function if you need other kinds of nodes. - */ + * get the first element node matching given xpath expression + * @param xpath: path to node (from current node) + * @param quiet: don't throw exception if node not found, + * return null node (@see IsNull()) + * @param ctxt: reusable xpath context + * + * For historical reasons, this only works for *element* nodes. + * Use the XPath function if you need other kinds of nodes. + */ TNode Node(TZtStringBuf xpath, bool quiet, TXPathContext& ctxt); TConstNode Node(TZtStringBuf xpath, bool quiet, TXPathContext& ctxt) const; - - /** + + /** * get node first child * @param name: child name - * @note if name is empty, returns the first child node of type "element" - * @note returns null node if no child found + * @note if name is empty, returns the first child node of type "element" + * @note returns null node if no child found */ TNode FirstChild(TZtStringBuf name); TConstNode FirstChild(TZtStringBuf name) const; @@ -272,8 +272,8 @@ namespace NXml { /** * get node neighbour * @param name: neighbour name - * @note if name is empty, returns the next sibling node of type "element" - * @node returns null node if no neighbour found + * @note if name is empty, returns the next sibling node of type "element" + * @node returns null node if no neighbour found */ TNode NextSibling(TZtStringBuf name); TConstNode NextSibling(TZtStringBuf name) const; @@ -495,18 +495,18 @@ namespace NXml { } /** - * Create xpath context to be used later for fast xpath evaluation. - * @param nss: explicitly specify XML namespaces to use and their prefixes - */ + * Create xpath context to be used later for fast xpath evaluation. + * @param nss: explicitly specify XML namespaces to use and their prefixes + */ TXPathContextPtr CreateXPathContext(const TNamespacesForXPath& nss = TNamespacesForXPath()) const { - return ActualNode.CreateXPathContext(nss); - } - - /** + return ActualNode.CreateXPathContext(nss); + } + + /** * get all element nodes matching given xpath expression * @param xpath: xpath expression * @param quiet: don't throw exception if zero nodes found - * @param ns: explicitly specify XML namespaces to use and their prefixes + * @param ns: explicitly specify XML namespaces to use and their prefixes * * For historical reasons, this only works for *element* nodes. * Use the XPath function if you need other kinds of nodes. @@ -516,44 +516,44 @@ namespace NXml { } /** - * get all element nodes matching given xpath expression - * @param xpath: xpath expression - * @param quiet: don't throw exception if zero nodes found - * @param ctxt: reusable xpath context - * - * For historical reasons, this only works for *element* nodes. - * Use the XPath function if you need other kinds of nodes. - */ + * get all element nodes matching given xpath expression + * @param xpath: xpath expression + * @param quiet: don't throw exception if zero nodes found + * @param ctxt: reusable xpath context + * + * For historical reasons, this only works for *element* nodes. + * Use the XPath function if you need other kinds of nodes. + */ TConstNodes Nodes(TZtStringBuf xpath, bool quiet, TXPathContext& ctxt) const { - return ActualNode.Nodes(xpath, quiet, ctxt); - } - - /** + return ActualNode.Nodes(xpath, quiet, ctxt); + } + + /** * get all nodes matching given xpath expression * @param xpath: xpath expression * @param quiet: don't throw exception if zero nodes found - * @param ns: explicitly specify XML namespaces to use and their prefixes + * @param ns: explicitly specify XML namespaces to use and their prefixes */ TConstNodes XPath(TZtStringBuf xpath, bool quiet = false, const TNamespacesForXPath& ns = TNamespacesForXPath()) const { return ActualNode.XPath(xpath, quiet, ns); } /** - * get all nodes matching given xpath expression - * @param xpath: xpath expression - * @param quiet: don't throw exception if zero nodes found - * @param ctxt: reusable xpath context - */ + * get all nodes matching given xpath expression + * @param xpath: xpath expression + * @param quiet: don't throw exception if zero nodes found + * @param ctxt: reusable xpath context + */ TConstNodes XPath(TZtStringBuf xpath, bool quiet, TXPathContext& ctxt) const { - return ActualNode.XPath(xpath, quiet, ctxt); - } - - /** + return ActualNode.XPath(xpath, quiet, ctxt); + } + + /** * get the first element node matching given xpath expression * @param xpath: path to node (from current node) * @param quiet: don't throw exception if node not found, - * return null node (@see IsNull()) - * @param ns: explicitly specify XML namespaces to use and their prefixes + * return null node (@see IsNull()) + * @param ns: explicitly specify XML namespaces to use and their prefixes * * For historical reasons, this only works for *element* nodes. * Use the XPath function if you need other kinds of nodes. @@ -562,20 +562,20 @@ namespace NXml { return ActualNode.Node(xpath, quiet, ns); } - /** - * get the first element node matching given xpath expression - * @param xpath: path to node (from current node) - * @param quiet: don't throw exception if node not found, - * return null node (@see IsNull()) - * @param ctxt: reusable xpath context - * - * For historical reasons, this only works for *element* nodes. - * Use the XPath function if you need other kinds of nodes. - */ + /** + * get the first element node matching given xpath expression + * @param xpath: path to node (from current node) + * @param quiet: don't throw exception if node not found, + * return null node (@see IsNull()) + * @param ctxt: reusable xpath context + * + * For historical reasons, this only works for *element* nodes. + * Use the XPath function if you need other kinds of nodes. + */ TConstNode Node(TZtStringBuf xpath, bool quiet, TXPathContext& ctxt) const { - return ActualNode.Node(xpath, quiet, ctxt); - } - + return ActualNode.Node(xpath, quiet, ctxt); + } + TConstNode FirstChild(TZtStringBuf name) const { return ActualNode.FirstChild(name); } diff --git a/library/cpp/xml/document/xml-document.cpp b/library/cpp/xml/document/xml-document.cpp index 18a554d732..6907217a14 100644 --- a/library/cpp/xml/document/xml-document.cpp +++ b/library/cpp/xml/document/xml-document.cpp @@ -26,7 +26,7 @@ namespace NXml { ParseString(xml); break; case RootName: { - TDocHolder doc(xmlNewDoc(XMLCHAR("1.0"))); + TDocHolder doc(xmlNewDoc(XMLCHAR("1.0"))); if (!doc) THROW(XmlException, "Can't create xml document."); doc->encoding = xmlStrdup(XMLCHAR("utf-8")); @@ -36,21 +36,21 @@ namespace NXml { THROW(XmlException, "Can't create root node."); xmlDocSetRootElement(doc.Get(), node.Get()); Y_UNUSED(node.Release()); - Doc = std::move(doc); + Doc = std::move(doc); } break; default: THROW(InvalidArgument, "Wrong source type"); } } - TDocument::TDocument(TDocument&& doc) - : Doc(std::move(doc.Doc)) + TDocument::TDocument(TDocument&& doc) + : Doc(std::move(doc.Doc)) { } - TDocument& TDocument::operator=(TDocument&& doc) { + TDocument& TDocument::operator=(TDocument&& doc) { if (this != &doc) - doc.Swap(*this); + doc.Swap(*this); return *this; } @@ -63,7 +63,7 @@ namespace NXml { if (!pctx) THROW(XmlException, "Can't create parser context"); - TDocHolder doc(xmlCtxtReadFile(pctx.Get(), file.c_str(), nullptr, XML_PARSE_NOCDATA)); + TDocHolder doc(xmlCtxtReadFile(pctx.Get(), file.c_str(), nullptr, XML_PARSE_NOCDATA)); if (!doc) THROW(XmlException, "Can't parse file " << file); @@ -72,7 +72,7 @@ namespace NXml { if (res == -1) THROW(XmlException, "XIncludes processing failed"); - Doc = std::move(doc); + Doc = std::move(doc); } void TDocument::ParseString(TZtStringBuf xml) { @@ -80,12 +80,12 @@ namespace NXml { if (pctx.Get() == nullptr) THROW(XmlException, "Can't create parser context"); - TDocHolder doc(xmlCtxtReadMemory(pctx.Get(), xml.c_str(), (int)xml.size(), nullptr, nullptr, XML_PARSE_NOCDATA)); + TDocHolder doc(xmlCtxtReadMemory(pctx.Get(), xml.c_str(), (int)xml.size(), nullptr, nullptr, XML_PARSE_NOCDATA)); if (!doc) THROW(XmlException, "Can't parse string"); - Doc = std::move(doc); + Doc = std::move(doc); } TNode TDocument::Root() { @@ -113,34 +113,34 @@ namespace NXml { } TXPathContextPtr TNode::CreateXPathContext(const TNamespacesForXPath& nss) const { - TXPathContextPtr ctx = xmlXPathNewContext(DocPointer); - if (!ctx) - THROW(XmlException, "Can't create empty xpath context"); - - for (const auto& ns : nss) { - const int r = xmlXPathRegisterNs(ctx.Get(), XMLCHAR(ns.Prefix.c_str()), XMLCHAR(ns.Url.c_str())); - if (r != 0) - THROW(XmlException, "Can't register namespace " << ns.Url << " with prefix " << ns.Prefix); - } - - return ctx; - } - + TXPathContextPtr ctx = xmlXPathNewContext(DocPointer); + if (!ctx) + THROW(XmlException, "Can't create empty xpath context"); + + for (const auto& ns : nss) { + const int r = xmlXPathRegisterNs(ctx.Get(), XMLCHAR(ns.Prefix.c_str()), XMLCHAR(ns.Url.c_str())); + if (r != 0) + THROW(XmlException, "Can't register namespace " << ns.Url << " with prefix " << ns.Prefix); + } + + return ctx; + } + TConstNodes TNode::XPath(TZtStringBuf xpath, bool quiet, const TNamespacesForXPath& ns) const { - TXPathContextPtr ctxt = CreateXPathContext(ns); - return XPath(xpath, quiet, *ctxt); - } + TXPathContextPtr ctxt = CreateXPathContext(ns); + return XPath(xpath, quiet, *ctxt); + } TConstNodes TNode::XPath(TZtStringBuf xpath, bool quiet, TXPathContext& ctxt) const { - if (xmlXPathSetContextNode(NodePointer, &ctxt) != 0) - THROW(XmlException, "Can't set xpath context node, probably the context is associated with another document"); - - TXPathObjectPtr obj = xmlXPathEvalExpression(XMLCHAR(xpath.c_str()), &ctxt); - if (!obj) - THROW(XmlException, "Can't evaluate xpath expression " << xpath); - - TConstNodes nodes(DocPointer, obj); - + if (xmlXPathSetContextNode(NodePointer, &ctxt) != 0) + THROW(XmlException, "Can't set xpath context node, probably the context is associated with another document"); + + TXPathObjectPtr obj = xmlXPathEvalExpression(XMLCHAR(xpath.c_str()), &ctxt); + if (!obj) + THROW(XmlException, "Can't evaluate xpath expression " << xpath); + + TConstNodes nodes(DocPointer, obj); + if (nodes.Size() == 0 && !quiet) THROW(NodeNotFound, xpath); @@ -148,30 +148,30 @@ namespace NXml { } TConstNodes TNode::Nodes(TZtStringBuf xpath, bool quiet, const TNamespacesForXPath& ns) const { - TXPathContextPtr ctxt = CreateXPathContext(ns); - return Nodes(xpath, quiet, *ctxt); - } - + TXPathContextPtr ctxt = CreateXPathContext(ns); + return Nodes(xpath, quiet, *ctxt); + } + TConstNodes TNode::Nodes(TZtStringBuf xpath, bool quiet, TXPathContext& ctxt) const { - TConstNodes nodes = XPath(xpath, quiet, ctxt); + TConstNodes nodes = XPath(xpath, quiet, ctxt); if (nodes.Size() != 0 && !nodes[0].IsElementNode()) THROW(XmlException, "xpath points to non-element nodes: " << xpath); return nodes; } TNode TNode::Node(TZtStringBuf xpath, bool quiet, const TNamespacesForXPath& ns) { - TXPathContextPtr ctxt = CreateXPathContext(ns); - return Node(xpath, quiet, *ctxt); - } + TXPathContextPtr ctxt = CreateXPathContext(ns); + return Node(xpath, quiet, *ctxt); + } TConstNode TNode::Node(TZtStringBuf xpath, bool quiet, const TNamespacesForXPath& ns) const { - TXPathContextPtr ctxt = CreateXPathContext(ns); - return Node(xpath, quiet, *ctxt); - } - + TXPathContextPtr ctxt = CreateXPathContext(ns); + return Node(xpath, quiet, *ctxt); + } + TNode TNode::Node(TZtStringBuf xpath, bool quiet, TXPathContext& ctxt) { - TConstNodes n = Nodes(xpath, quiet, ctxt); - + TConstNodes n = Nodes(xpath, quiet, ctxt); + if (n.Size() == 0 && !quiet) THROW(NodeNotFound, xpath); @@ -182,7 +182,7 @@ namespace NXml { } TConstNode TNode::Node(TZtStringBuf xpath, bool quiet, TXPathContext& ctxt) const { - return const_cast<TNode*>(this)->Node(xpath, quiet, ctxt); + return const_cast<TNode*>(this)->Node(xpath, quiet, ctxt); } TNode TNode::FirstChild(TZtStringBuf name) { @@ -271,9 +271,9 @@ namespace NXml { } TString TNode::Name() const { - if (IsNull()) - THROW(XmlException, "Node is null"); - + if (IsNull()) + THROW(XmlException, "Node is null"); + return CAST2CHAR(NodePointer->name); } @@ -294,9 +294,9 @@ namespace NXml { } bool TNode::IsText() const { - if (IsNull()) - THROW(XmlException, "Node is null"); - + if (IsNull()) + THROW(XmlException, "Node is null"); + return NodePointer->type == XML_TEXT_NODE; } @@ -368,10 +368,10 @@ namespace NXml { return TConstNodesRef(*this); } - TConstNodes::TConstNodes(xmlDoc* doc, TXPathObjectPtr obj) - : SizeValue(obj && obj->nodesetval ? obj->nodesetval->nodeNr : 0) - , Doc(doc) - , Obj(obj) + TConstNodes::TConstNodes(xmlDoc* doc, TXPathObjectPtr obj) + : SizeValue(obj && obj->nodesetval ? obj->nodesetval->nodeNr : 0) + , Doc(doc) + , Obj(obj) { } diff --git a/library/cpp/xml/document/xml-document_ut.cpp b/library/cpp/xml/document/xml-document_ut.cpp index 9f537b75c4..8361e3c503 100644 --- a/library/cpp/xml/document/xml-document_ut.cpp +++ b/library/cpp/xml/document/xml-document_ut.cpp @@ -84,11 +84,11 @@ Y_UNIT_TEST_SUITE(TestXmlDocument) { TConstNode text = root.Node("h:text", false, nss); UNIT_ASSERT_EQUAL(text.Value<TString>(), "Некоторый текст"); - - // For performance you can create xpath context once using nss and pass it. - TXPathContextPtr ctxt = root.CreateXPathContext(nss); - UNIT_ASSERT(root.Node("text", true, *ctxt).IsNull()); - UNIT_ASSERT_EXCEPTION(root.Node("text", false, *ctxt), yexception); + + // For performance you can create xpath context once using nss and pass it. + TXPathContextPtr ctxt = root.CreateXPathContext(nss); + UNIT_ASSERT(root.Node("text", true, *ctxt).IsNull()); + UNIT_ASSERT_EXCEPTION(root.Node("text", false, *ctxt), yexception); UNIT_ASSERT_EQUAL(root.Node("h:text", false, *ctxt).Value<TString>(), "Некоторый текст"); } Y_UNIT_TEST(XmlNodes) { @@ -119,37 +119,37 @@ Y_UNIT_TEST_SUITE(TestXmlDocument) { iterLog << node2.Name() << ';'; } UNIT_ASSERT_STRINGS_EQUAL(iterLog.Str(), "a;c;"); - - // get only element nodes, ignore text nodes with empty "name" param + + // get only element nodes, ignore text nodes with empty "name" param node = root.FirstChild(TString()); - UNIT_ASSERT_EQUAL(node.IsText(), false); - UNIT_ASSERT_EQUAL(node.Name(), "a"); + UNIT_ASSERT_EQUAL(node.IsText(), false); + UNIT_ASSERT_EQUAL(node.Name(), "a"); node = node.NextSibling(TString()); - UNIT_ASSERT_EQUAL(node.IsText(), false); - UNIT_ASSERT_EQUAL(node.Name(), "c"); - - // use exact "name" to retrieve children and siblings - node = root.FirstChild("a"); - UNIT_ASSERT_EQUAL(node.IsNull(), false); - UNIT_ASSERT_EQUAL(node.Name(), "a"); - node = node.NextSibling("c"); - UNIT_ASSERT_EQUAL(node.IsNull(), false); - UNIT_ASSERT_EQUAL(node.Name(), "c"); - node = root.FirstChild("c"); // skip "a" - UNIT_ASSERT_EQUAL(node.IsNull(), false); - UNIT_ASSERT_EQUAL(node.Name(), "c"); - - // node not found: no exceptions, null nodes are returned - node = root.FirstChild("b"); // b is not direct child of root - UNIT_ASSERT_EQUAL(node.IsNull(), true); - node = root.FirstChild("nosuchnode"); - UNIT_ASSERT_EQUAL(node.IsNull(), true); - node = root.FirstChild(); - node = root.NextSibling("unknownnode"); - UNIT_ASSERT_EQUAL(node.IsNull(), true); - UNIT_ASSERT_EXCEPTION(node.Name(), yexception); + UNIT_ASSERT_EQUAL(node.IsText(), false); + UNIT_ASSERT_EQUAL(node.Name(), "c"); + + // use exact "name" to retrieve children and siblings + node = root.FirstChild("a"); + UNIT_ASSERT_EQUAL(node.IsNull(), false); + UNIT_ASSERT_EQUAL(node.Name(), "a"); + node = node.NextSibling("c"); + UNIT_ASSERT_EQUAL(node.IsNull(), false); + UNIT_ASSERT_EQUAL(node.Name(), "c"); + node = root.FirstChild("c"); // skip "a" + UNIT_ASSERT_EQUAL(node.IsNull(), false); + UNIT_ASSERT_EQUAL(node.Name(), "c"); + + // node not found: no exceptions, null nodes are returned + node = root.FirstChild("b"); // b is not direct child of root + UNIT_ASSERT_EQUAL(node.IsNull(), true); + node = root.FirstChild("nosuchnode"); + UNIT_ASSERT_EQUAL(node.IsNull(), true); + node = root.FirstChild(); + node = root.NextSibling("unknownnode"); + UNIT_ASSERT_EQUAL(node.IsNull(), true); + UNIT_ASSERT_EXCEPTION(node.Name(), yexception); UNIT_ASSERT_EXCEPTION(node.Value<TString>(), yexception); - UNIT_ASSERT_EXCEPTION(node.IsText(), yexception); + UNIT_ASSERT_EXCEPTION(node.IsText(), yexception); } Y_UNIT_TEST(DefVal) { using namespace NXml; @@ -235,49 +235,49 @@ Y_UNIT_TEST_SUITE(TestXmlDocument) { UNIT_ASSERT_VALUES_EQUAL(n.ToString(), "<a><b len=\"15\" correct=\"1\">hello world</b></a>"); } } - + Y_UNIT_TEST(ReuseXPathContext) { - using namespace NXml; - - TDocument xml( - "<?xml version=\"1.0\"?>\n" - "<root>\n" - "<a><b><c>Hello, world!</c></b></a>\n" - "<text x=\"10\">First</text>\n" - "<text y=\"20\">Second</text>\n" - "</root>", - TDocument::String); - - TXPathContextPtr rootCtxt = xml.Root().CreateXPathContext(); - - // Check Node() - TConstNode b = xml.Root().Node("a/b", false, *rootCtxt); - - // We can use root node context for xpath evaluation in any node - TConstNode c1 = b.Node("c", false, *rootCtxt); + using namespace NXml; + + TDocument xml( + "<?xml version=\"1.0\"?>\n" + "<root>\n" + "<a><b><c>Hello, world!</c></b></a>\n" + "<text x=\"10\">First</text>\n" + "<text y=\"20\">Second</text>\n" + "</root>", + TDocument::String); + + TXPathContextPtr rootCtxt = xml.Root().CreateXPathContext(); + + // Check Node() + TConstNode b = xml.Root().Node("a/b", false, *rootCtxt); + + // We can use root node context for xpath evaluation in any node + TConstNode c1 = b.Node("c", false, *rootCtxt); UNIT_ASSERT_EQUAL(c1.Value<TString>(), "Hello, world!"); - - TXPathContextPtr bCtxt = b.CreateXPathContext(); - TConstNode c2 = b.Node("c", false, *bCtxt); + + TXPathContextPtr bCtxt = b.CreateXPathContext(); + TConstNode c2 = b.Node("c", false, *bCtxt); UNIT_ASSERT_EQUAL(c2.Value<TString>(), "Hello, world!"); - - // Mixing contexts from different documents is forbidden - TDocument otherXml("<root></root>", TDocument::String); - TXPathContextPtr otherCtxt = otherXml.Root().CreateXPathContext(); - UNIT_ASSERT_EXCEPTION(b.Node("c", false, *otherCtxt), yexception); - - // Check Nodes() - TConstNodes texts = xml.Root().Nodes("text", true, *rootCtxt); - UNIT_ASSERT_EQUAL(texts.Size(), 2); - - // Nodes() does't work for non-element nodes - UNIT_ASSERT_EXCEPTION(xml.Root().Nodes("text/@x", true, *rootCtxt), yexception); - - // Check XPath() - TConstNodes ys = xml.Root().XPath("text/@y", true, *rootCtxt); - UNIT_ASSERT_EQUAL(ys.Size(), 1); - UNIT_ASSERT_EQUAL(ys[0].Value<int>(), 20); - } + + // Mixing contexts from different documents is forbidden + TDocument otherXml("<root></root>", TDocument::String); + TXPathContextPtr otherCtxt = otherXml.Root().CreateXPathContext(); + UNIT_ASSERT_EXCEPTION(b.Node("c", false, *otherCtxt), yexception); + + // Check Nodes() + TConstNodes texts = xml.Root().Nodes("text", true, *rootCtxt); + UNIT_ASSERT_EQUAL(texts.Size(), 2); + + // Nodes() does't work for non-element nodes + UNIT_ASSERT_EXCEPTION(xml.Root().Nodes("text/@x", true, *rootCtxt), yexception); + + // Check XPath() + TConstNodes ys = xml.Root().XPath("text/@y", true, *rootCtxt); + UNIT_ASSERT_EQUAL(ys.Size(), 1); + UNIT_ASSERT_EQUAL(ys[0].Value<int>(), 20); + } Y_UNIT_TEST(Html) { using namespace NXml; @@ -291,19 +291,19 @@ Y_UNIT_TEST_SUITE(TestXmlDocument) { videoNode.SaveAsHtml(ss); UNIT_ASSERT_EQUAL(ss.Str(), "<video controls></video>"); } - - Y_UNIT_TEST(Move) { - using namespace NXml; - - TDocument xml1("foo", TDocument::RootName); - xml1.Root().AddChild("bar"); - - UNIT_ASSERT_VALUES_EQUAL(xml1.Root().ToString(), "<foo><bar/></foo>"); - - TDocument xml2 = std::move(xml1); - UNIT_ASSERT_EXCEPTION(xml1.Root(), yexception); - UNIT_ASSERT_VALUES_EQUAL(xml2.Root().ToString(), "<foo><bar/></foo>"); - } + + Y_UNIT_TEST(Move) { + using namespace NXml; + + TDocument xml1("foo", TDocument::RootName); + xml1.Root().AddChild("bar"); + + UNIT_ASSERT_VALUES_EQUAL(xml1.Root().ToString(), "<foo><bar/></foo>"); + + TDocument xml2 = std::move(xml1); + UNIT_ASSERT_EXCEPTION(xml1.Root(), yexception); + UNIT_ASSERT_VALUES_EQUAL(xml2.Root().ToString(), "<foo><bar/></foo>"); + } Y_UNIT_TEST(StringConversion) { using namespace NXml; diff --git a/library/cpp/xml/document/xml-textreader.cpp b/library/cpp/xml/document/xml-textreader.cpp index b946f1fbf2..291c1a0f55 100644 --- a/library/cpp/xml/document/xml-textreader.cpp +++ b/library/cpp/xml/document/xml-textreader.cpp @@ -1,318 +1,318 @@ -#include "xml-textreader.h" - -#include <contrib/libs/libxml/include/libxml/xmlreader.h> - -#include <util/generic/yexception.h> -#include <util/string/strip.h> -#include <util/system/compiler.h> - -namespace NXml { +#include "xml-textreader.h" + +#include <contrib/libs/libxml/include/libxml/xmlreader.h> + +#include <util/generic/yexception.h> +#include <util/string/strip.h> +#include <util/system/compiler.h> + +namespace NXml { TTextReader::TTextReader(IInputStream& stream, const TOptions& options) - : Stream(stream) - , IsError(false) - { + : Stream(stream) + , IsError(false) + { Impl.Reset(xmlReaderForIO(ReadFromInputStreamCallback, nullptr, this, nullptr, nullptr, options.GetMask())); - - if (!Impl) { - ythrow yexception() << "cannot instantiate underlying xmlTextReader structure"; - } - SetupErrorHandler(); - CheckForExceptions(); - } - - TTextReader::~TTextReader() { - } - - bool TTextReader::Read() { - return BoolResult(xmlTextReaderRead(Impl.Get())); - } - + + if (!Impl) { + ythrow yexception() << "cannot instantiate underlying xmlTextReader structure"; + } + SetupErrorHandler(); + CheckForExceptions(); + } + + TTextReader::~TTextReader() { + } + + bool TTextReader::Read() { + return BoolResult(xmlTextReaderRead(Impl.Get())); + } + TString TTextReader::ReadInnerXml() const { - return TempStringOrEmptyResult(xmlTextReaderReadInnerXml(Impl.Get())); - } - + return TempStringOrEmptyResult(xmlTextReaderReadInnerXml(Impl.Get())); + } + TString TTextReader::ReadOuterXml() const { - return TempStringOrEmptyResult(xmlTextReaderReadOuterXml(Impl.Get())); - } - + return TempStringOrEmptyResult(xmlTextReaderReadOuterXml(Impl.Get())); + } + TString TTextReader::ReadString() const { - return TempStringOrEmptyResult(xmlTextReaderReadString(Impl.Get())); - } - - bool TTextReader::ReadAttributeValue() const { - return BoolResult(xmlTextReaderReadAttributeValue(Impl.Get())); - } - - int TTextReader::GetAttributeCount() const { - return IntResult(xmlTextReaderAttributeCount(Impl.Get())); - } - - TStringBuf TTextReader::GetBaseUri() const { - return ConstStringOrEmptyResult(xmlTextReaderConstBaseUri(Impl.Get())); - } - - int TTextReader::GetDepth() const { - return IntResult(xmlTextReaderDepth(Impl.Get())); - } - - bool TTextReader::HasAttributes() const { - return BoolResult(xmlTextReaderHasAttributes(Impl.Get())); - } - - bool TTextReader::HasValue() const { - return BoolResult(xmlTextReaderHasValue(Impl.Get())); - } - - bool TTextReader::IsDefault() const { - return BoolResult(xmlTextReaderIsDefault(Impl.Get())); - } - - bool TTextReader::IsEmptyElement() const { - return BoolResult(xmlTextReaderIsEmptyElement(Impl.Get())); - } - - TStringBuf TTextReader::GetLocalName() const { - return ConstStringOrEmptyResult(xmlTextReaderConstLocalName(Impl.Get())); - } - - TStringBuf TTextReader::GetName() const { - return ConstStringOrEmptyResult(xmlTextReaderConstName(Impl.Get())); - } - - TStringBuf TTextReader::GetNamespaceUri() const { - return ConstStringOrEmptyResult(xmlTextReaderConstNamespaceUri(Impl.Get())); - } - - TTextReader::ENodeType TTextReader::GetNodeType() const { - return static_cast<ENodeType>(IntResult(xmlTextReaderNodeType(Impl.Get()))); - } - - TStringBuf TTextReader::GetPrefix() const { - return ConstStringOrEmptyResult(xmlTextReaderConstPrefix(Impl.Get())); - } - - char TTextReader::GetQuoteChar() const { - return CharResult(xmlTextReaderQuoteChar(Impl.Get())); - } - - TStringBuf TTextReader::GetValue() const { - return ConstStringOrEmptyResult(xmlTextReaderConstValue(Impl.Get())); - } - - TTextReader::EReadState TTextReader::GetReadState() const { - return static_cast<EReadState>(IntResult(xmlTextReaderReadState(Impl.Get()))); - } - - void TTextReader::Close() { - if (xmlTextReaderClose(Impl.Get()) == -1) { - ThrowException(); - } - } - + return TempStringOrEmptyResult(xmlTextReaderReadString(Impl.Get())); + } + + bool TTextReader::ReadAttributeValue() const { + return BoolResult(xmlTextReaderReadAttributeValue(Impl.Get())); + } + + int TTextReader::GetAttributeCount() const { + return IntResult(xmlTextReaderAttributeCount(Impl.Get())); + } + + TStringBuf TTextReader::GetBaseUri() const { + return ConstStringOrEmptyResult(xmlTextReaderConstBaseUri(Impl.Get())); + } + + int TTextReader::GetDepth() const { + return IntResult(xmlTextReaderDepth(Impl.Get())); + } + + bool TTextReader::HasAttributes() const { + return BoolResult(xmlTextReaderHasAttributes(Impl.Get())); + } + + bool TTextReader::HasValue() const { + return BoolResult(xmlTextReaderHasValue(Impl.Get())); + } + + bool TTextReader::IsDefault() const { + return BoolResult(xmlTextReaderIsDefault(Impl.Get())); + } + + bool TTextReader::IsEmptyElement() const { + return BoolResult(xmlTextReaderIsEmptyElement(Impl.Get())); + } + + TStringBuf TTextReader::GetLocalName() const { + return ConstStringOrEmptyResult(xmlTextReaderConstLocalName(Impl.Get())); + } + + TStringBuf TTextReader::GetName() const { + return ConstStringOrEmptyResult(xmlTextReaderConstName(Impl.Get())); + } + + TStringBuf TTextReader::GetNamespaceUri() const { + return ConstStringOrEmptyResult(xmlTextReaderConstNamespaceUri(Impl.Get())); + } + + TTextReader::ENodeType TTextReader::GetNodeType() const { + return static_cast<ENodeType>(IntResult(xmlTextReaderNodeType(Impl.Get()))); + } + + TStringBuf TTextReader::GetPrefix() const { + return ConstStringOrEmptyResult(xmlTextReaderConstPrefix(Impl.Get())); + } + + char TTextReader::GetQuoteChar() const { + return CharResult(xmlTextReaderQuoteChar(Impl.Get())); + } + + TStringBuf TTextReader::GetValue() const { + return ConstStringOrEmptyResult(xmlTextReaderConstValue(Impl.Get())); + } + + TTextReader::EReadState TTextReader::GetReadState() const { + return static_cast<EReadState>(IntResult(xmlTextReaderReadState(Impl.Get()))); + } + + void TTextReader::Close() { + if (xmlTextReaderClose(Impl.Get()) == -1) { + ThrowException(); + } + } + TString TTextReader::GetAttribute(int number) const { - return TempStringResult(xmlTextReaderGetAttributeNo(Impl.Get(), number)); - } - + return TempStringResult(xmlTextReaderGetAttributeNo(Impl.Get(), number)); + } + TString TTextReader::GetAttribute(TZtStringBuf name) const { return TempStringResult(xmlTextReaderGetAttribute(Impl.Get(), XMLCHAR(name.data()))); - } - + } + TString TTextReader::GetAttribute(TZtStringBuf localName, TZtStringBuf nsUri) const { return TempStringResult(xmlTextReaderGetAttributeNs(Impl.Get(), XMLCHAR(localName.data()), XMLCHAR(nsUri.data()))); - } - + } + TString TTextReader::LookupNamespace(TZtStringBuf prefix) const { return TempStringResult(xmlTextReaderLookupNamespace(Impl.Get(), XMLCHAR(prefix.data()))); - } - - bool TTextReader::MoveToAttribute(int number) { - return BoolResult(xmlTextReaderMoveToAttributeNo(Impl.Get(), number)); - } - + } + + bool TTextReader::MoveToAttribute(int number) { + return BoolResult(xmlTextReaderMoveToAttributeNo(Impl.Get(), number)); + } + bool TTextReader::MoveToAttribute(TZtStringBuf name) { return BoolResult(xmlTextReaderMoveToAttribute(Impl.Get(), XMLCHAR(name.data()))); - } - + } + bool TTextReader::MoveToAttribute(TZtStringBuf localName, TZtStringBuf nsUri) { return BoolResult(xmlTextReaderMoveToAttributeNs(Impl.Get(), XMLCHAR(localName.data()), XMLCHAR(nsUri.data()))); - } - - bool TTextReader::MoveToFirstAttribute() { - return BoolResult(xmlTextReaderMoveToFirstAttribute(Impl.Get())); - } - - bool TTextReader::MoveToNextAttribute() { - return BoolResult(xmlTextReaderMoveToNextAttribute(Impl.Get())); - } - - bool TTextReader::MoveToElement() { - return BoolResult(xmlTextReaderMoveToElement(Impl.Get())); - } - - TConstNode TTextReader::Expand() const { - const xmlNodePtr node = xmlTextReaderExpand(Impl.Get()); - if (node == nullptr) { - ThrowException(); - } - return TConstNode(TNode(node->doc, node)); - } - - bool TTextReader::Next() { - return BoolResult(xmlTextReaderNext(Impl.Get())); - } - - bool TTextReader::IsValid() const { - return BoolResult(xmlTextReaderIsValid(Impl.Get())); - } - - // Callback for xmlReaderForIO() to read more data. + } + + bool TTextReader::MoveToFirstAttribute() { + return BoolResult(xmlTextReaderMoveToFirstAttribute(Impl.Get())); + } + + bool TTextReader::MoveToNextAttribute() { + return BoolResult(xmlTextReaderMoveToNextAttribute(Impl.Get())); + } + + bool TTextReader::MoveToElement() { + return BoolResult(xmlTextReaderMoveToElement(Impl.Get())); + } + + TConstNode TTextReader::Expand() const { + const xmlNodePtr node = xmlTextReaderExpand(Impl.Get()); + if (node == nullptr) { + ThrowException(); + } + return TConstNode(TNode(node->doc, node)); + } + + bool TTextReader::Next() { + return BoolResult(xmlTextReaderNext(Impl.Get())); + } + + bool TTextReader::IsValid() const { + return BoolResult(xmlTextReaderIsValid(Impl.Get())); + } + + // Callback for xmlReaderForIO() to read more data. // It is almost "noexcept" (std::bad_alloc may happen when saving exception message to new TString). - // Waiting for std::exception_ptr and std::rethrow_exception from C++11 in Arcadia to make it really "noexcept". - int TTextReader::ReadFromInputStreamCallback(void* context, char* buffer, int len) { + // Waiting for std::exception_ptr and std::rethrow_exception from C++11 in Arcadia to make it really "noexcept". + int TTextReader::ReadFromInputStreamCallback(void* context, char* buffer, int len) { Y_ASSERT(len >= 0); - TTextReader* reader = static_cast<TTextReader*>(context); - - int result = -1; - + TTextReader* reader = static_cast<TTextReader*>(context); + + int result = -1; + // Exception may be thrown by IInputStream::Read(). - // It is caught unconditionally because exceptions cannot safely pass through libxml2 plain C code - // (no destructors, no RAII, raw pointers, so in case of stack unwinding some memory gets leaked). - - try { - result = reader->Stream.Read(buffer, len); - } catch (const yexception& ex) { - reader->LogError() << "read from input stream failed: " << ex; - } catch (...) { - reader->LogError() << "read from input stream failed"; - } - - return result; - } - - void TTextReader::OnLibxmlError(void* arg, const char* msg, xmlParserSeverities severity, xmlTextReaderLocatorPtr locator) { - TTextReader* reader = static_cast<TTextReader*>(arg); + // It is caught unconditionally because exceptions cannot safely pass through libxml2 plain C code + // (no destructors, no RAII, raw pointers, so in case of stack unwinding some memory gets leaked). + + try { + result = reader->Stream.Read(buffer, len); + } catch (const yexception& ex) { + reader->LogError() << "read from input stream failed: " << ex; + } catch (...) { + reader->LogError() << "read from input stream failed"; + } + + return result; + } + + void TTextReader::OnLibxmlError(void* arg, const char* msg, xmlParserSeverities severity, xmlTextReaderLocatorPtr locator) { + TTextReader* reader = static_cast<TTextReader*>(arg); Y_ASSERT(reader != nullptr); - - TStringStream& out = reader->LogError(); - - if (severity == XML_PARSER_SEVERITY_ERROR) { - out << "libxml parse error"; - } else if (severity == XML_PARSER_SEVERITY_VALIDITY_ERROR) { - out << "libxml validity error"; - } else { - out << "libxml error"; - } - - if (locator != nullptr) { - const int line = xmlTextReaderLocatorLineNumber(locator); - const TCharPtr baseUri = xmlTextReaderLocatorBaseURI(locator); - out << " ("; - if (line != -1) { - out << "at line " << line; - if (baseUri) { - out << ", "; - } - } - if (baseUri) { - out << "base URI " << CAST2CHAR(baseUri.Get()); - } - out << ")"; - } - - TStringBuf message = (msg != nullptr) ? msg : "unknown"; - message = StripStringRight(message); // remove trailing \n that is added by libxml - if (!message.empty()) { - out << ": " << message; - } - } - - void TTextReader::SetupErrorHandler() { - xmlTextReaderErrorFunc func = nullptr; - void* arg = nullptr; - - // We respect any other error handlers already set up: - xmlTextReaderGetErrorHandler(Impl.Get(), &func, &arg); - if (!func) { - func = TTextReader::OnLibxmlError; - xmlTextReaderSetErrorHandler(Impl.Get(), func, this); - } - } - - TStringStream& TTextReader::LogError() const { - if (IsError) { // maybe there are previous errors - ErrorBuffer << Endl; - } - IsError = true; - return ErrorBuffer; - } - - void TTextReader::CheckForExceptions() const { + + TStringStream& out = reader->LogError(); + + if (severity == XML_PARSER_SEVERITY_ERROR) { + out << "libxml parse error"; + } else if (severity == XML_PARSER_SEVERITY_VALIDITY_ERROR) { + out << "libxml validity error"; + } else { + out << "libxml error"; + } + + if (locator != nullptr) { + const int line = xmlTextReaderLocatorLineNumber(locator); + const TCharPtr baseUri = xmlTextReaderLocatorBaseURI(locator); + out << " ("; + if (line != -1) { + out << "at line " << line; + if (baseUri) { + out << ", "; + } + } + if (baseUri) { + out << "base URI " << CAST2CHAR(baseUri.Get()); + } + out << ")"; + } + + TStringBuf message = (msg != nullptr) ? msg : "unknown"; + message = StripStringRight(message); // remove trailing \n that is added by libxml + if (!message.empty()) { + out << ": " << message; + } + } + + void TTextReader::SetupErrorHandler() { + xmlTextReaderErrorFunc func = nullptr; + void* arg = nullptr; + + // We respect any other error handlers already set up: + xmlTextReaderGetErrorHandler(Impl.Get(), &func, &arg); + if (!func) { + func = TTextReader::OnLibxmlError; + xmlTextReaderSetErrorHandler(Impl.Get(), func, this); + } + } + + TStringStream& TTextReader::LogError() const { + if (IsError) { // maybe there are previous errors + ErrorBuffer << Endl; + } + IsError = true; + return ErrorBuffer; + } + + void TTextReader::CheckForExceptions() const { if (Y_LIKELY(!IsError)) { - return; - } - + return; + } + const TString message = ErrorBuffer.Str(); - ErrorBuffer.clear(); - IsError = false; - - ythrow yexception() << message; - } - - void TTextReader::ThrowException() const { - CheckForExceptions(); - // Probably CheckForExceptions() would throw an exception with more verbose message. As the last resort - // (we do not even know the name of the failed libxml function, but it's possible to deduce it from stacktrace): - ythrow yexception() << "libxml function returned error exit code"; - } - - bool TTextReader::BoolResult(int value) const { + ErrorBuffer.clear(); + IsError = false; + + ythrow yexception() << message; + } + + void TTextReader::ThrowException() const { + CheckForExceptions(); + // Probably CheckForExceptions() would throw an exception with more verbose message. As the last resort + // (we do not even know the name of the failed libxml function, but it's possible to deduce it from stacktrace): + ythrow yexception() << "libxml function returned error exit code"; + } + + bool TTextReader::BoolResult(int value) const { if (Y_UNLIKELY(value == -1)) { - ThrowException(); - } - return (value != 0); - } - - int TTextReader::IntResult(int value) const { + ThrowException(); + } + return (value != 0); + } + + int TTextReader::IntResult(int value) const { if (Y_UNLIKELY(value == -1)) { - ThrowException(); - } - return value; - } - - char TTextReader::CharResult(int value) const { + ThrowException(); + } + return value; + } + + char TTextReader::CharResult(int value) const { if (Y_UNLIKELY(value == -1)) { - ThrowException(); - } - return static_cast<char>(value); - } - - TStringBuf TTextReader::ConstStringResult(const xmlChar* value) const { + ThrowException(); + } + return static_cast<char>(value); + } + + TStringBuf TTextReader::ConstStringResult(const xmlChar* value) const { if (Y_UNLIKELY(value == nullptr)) { - ThrowException(); - } - return CAST2CHAR(value); - } - - TStringBuf TTextReader::ConstStringOrEmptyResult(const xmlChar* value) const { - CheckForExceptions(); - return (value != nullptr) ? TStringBuf(CAST2CHAR(value)) : TStringBuf(); - } - + ThrowException(); + } + return CAST2CHAR(value); + } + + TStringBuf TTextReader::ConstStringOrEmptyResult(const xmlChar* value) const { + CheckForExceptions(); + return (value != nullptr) ? TStringBuf(CAST2CHAR(value)) : TStringBuf(); + } + TString TTextReader::TempStringResult(TCharPtr value) const { if (Y_UNLIKELY(value == nullptr)) { - ThrowException(); - } + ThrowException(); + } return TString(CAST2CHAR(value.Get())); - } - + } + TString TTextReader::TempStringOrEmptyResult(TCharPtr value) const { - CheckForExceptions(); + CheckForExceptions(); return (value != nullptr) ? TString(CAST2CHAR(value.Get())) : TString(); - } - - struct TTextReader::TDeleter { - static inline void Destroy(xmlTextReaderPtr handle) { - xmlFreeTextReader(handle); - } - }; + } + + struct TTextReader::TDeleter { + static inline void Destroy(xmlTextReaderPtr handle) { + xmlFreeTextReader(handle); + } + }; } diff --git a/library/cpp/xml/document/xml-textreader.h b/library/cpp/xml/document/xml-textreader.h index ab4c329d26..9e0b8be6ea 100644 --- a/library/cpp/xml/document/xml-textreader.h +++ b/library/cpp/xml/document/xml-textreader.h @@ -1,325 +1,325 @@ -#pragma once - -#include "xml-document.h" +#pragma once + +#include "xml-document.h" #include "xml-options.h" - -#include <contrib/libs/libxml/include/libxml/xmlreader.h> - + +#include <contrib/libs/libxml/include/libxml/xmlreader.h> + #include <library/cpp/string_utils/ztstrbuf/ztstrbuf.h> -#include <util/generic/noncopyable.h> -#include <util/generic/ptr.h> -#include <util/generic/strbuf.h> +#include <util/generic/noncopyable.h> +#include <util/generic/ptr.h> +#include <util/generic/strbuf.h> #include <util/generic/string.h> #include <functional> -#include <util/stream/input.h> -#include <util/stream/str.h> - -namespace NXml { - /** - * TextReader Parser - * - * API of the XML streaming API based on C# interfaces. - * Provides fast, non-cached, forward-only access to XML data. - * - * Like the SAX parser, the TextReader parser is suitable for sequential - * parsing, but instead of implementing handlers for specific parts of the - * document, it allows you to detect the current node type, process the node - * accordingly, and skip forward in the document as much as necessary. - * - * Unlike the DOM parser, you may not move backwards in the XML document. - * And unlike the SAX parser, you must not waste time processing nodes that do not - * interest you. - * - * All methods are on the single parser instance, but their result depends on the current context. - * For instance, use Read() to move to the next node, and MoveToElement() to navigate to child nodes. - * These methods will return false when no more nodes are available. Then use - * methods such as GetName() and GetValue() to examine the elements and their attributes. - * - * This wrapper is inspired by TextReader from libxml++. - */ - - class TTextReader: private TNonCopyable { - public: - // strongly-typed alias for enum from xmlreader.h - enum class ENodeType : int { - // clang-format off - Attribute = XML_READER_TYPE_ATTRIBUTE, - CDATA = XML_READER_TYPE_CDATA, - Comment = XML_READER_TYPE_COMMENT, - Document = XML_READER_TYPE_DOCUMENT, - DocumentFragment = XML_READER_TYPE_DOCUMENT_FRAGMENT, - DocumentType = XML_READER_TYPE_DOCUMENT_TYPE, - Element = XML_READER_TYPE_ELEMENT, - EndElement = XML_READER_TYPE_END_ELEMENT, - EndEntity = XML_READER_TYPE_END_ENTITY, - Entity = XML_READER_TYPE_ENTITY, - EntityReference = XML_READER_TYPE_ENTITY_REFERENCE, - None = XML_READER_TYPE_NONE, - Notation = XML_READER_TYPE_NOTATION, - ProcessingInstruction = XML_READER_TYPE_PROCESSING_INSTRUCTION, - SignificantWhitespace = XML_READER_TYPE_SIGNIFICANT_WHITESPACE, - Text = XML_READER_TYPE_TEXT, - Whitespace = XML_READER_TYPE_WHITESPACE, - XmlDeclaration = XML_READER_TYPE_XML_DECLARATION, - // clang-format on - }; - - enum class EReadState : int { - // clang-format off - Closed = XML_TEXTREADER_MODE_CLOSED, - EndOfFile = XML_TEXTREADER_MODE_EOF, - Error = XML_TEXTREADER_MODE_ERROR, - Initial = XML_TEXTREADER_MODE_INITIAL, - Interactive = XML_TEXTREADER_MODE_INTERACTIVE, - Reading = XML_TEXTREADER_MODE_READING, - // clang-format on - }; - - public: +#include <util/stream/input.h> +#include <util/stream/str.h> + +namespace NXml { + /** + * TextReader Parser + * + * API of the XML streaming API based on C# interfaces. + * Provides fast, non-cached, forward-only access to XML data. + * + * Like the SAX parser, the TextReader parser is suitable for sequential + * parsing, but instead of implementing handlers for specific parts of the + * document, it allows you to detect the current node type, process the node + * accordingly, and skip forward in the document as much as necessary. + * + * Unlike the DOM parser, you may not move backwards in the XML document. + * And unlike the SAX parser, you must not waste time processing nodes that do not + * interest you. + * + * All methods are on the single parser instance, but their result depends on the current context. + * For instance, use Read() to move to the next node, and MoveToElement() to navigate to child nodes. + * These methods will return false when no more nodes are available. Then use + * methods such as GetName() and GetValue() to examine the elements and their attributes. + * + * This wrapper is inspired by TextReader from libxml++. + */ + + class TTextReader: private TNonCopyable { + public: + // strongly-typed alias for enum from xmlreader.h + enum class ENodeType : int { + // clang-format off + Attribute = XML_READER_TYPE_ATTRIBUTE, + CDATA = XML_READER_TYPE_CDATA, + Comment = XML_READER_TYPE_COMMENT, + Document = XML_READER_TYPE_DOCUMENT, + DocumentFragment = XML_READER_TYPE_DOCUMENT_FRAGMENT, + DocumentType = XML_READER_TYPE_DOCUMENT_TYPE, + Element = XML_READER_TYPE_ELEMENT, + EndElement = XML_READER_TYPE_END_ELEMENT, + EndEntity = XML_READER_TYPE_END_ENTITY, + Entity = XML_READER_TYPE_ENTITY, + EntityReference = XML_READER_TYPE_ENTITY_REFERENCE, + None = XML_READER_TYPE_NONE, + Notation = XML_READER_TYPE_NOTATION, + ProcessingInstruction = XML_READER_TYPE_PROCESSING_INSTRUCTION, + SignificantWhitespace = XML_READER_TYPE_SIGNIFICANT_WHITESPACE, + Text = XML_READER_TYPE_TEXT, + Whitespace = XML_READER_TYPE_WHITESPACE, + XmlDeclaration = XML_READER_TYPE_XML_DECLARATION, + // clang-format on + }; + + enum class EReadState : int { + // clang-format off + Closed = XML_TEXTREADER_MODE_CLOSED, + EndOfFile = XML_TEXTREADER_MODE_EOF, + Error = XML_TEXTREADER_MODE_ERROR, + Initial = XML_TEXTREADER_MODE_INITIAL, + Interactive = XML_TEXTREADER_MODE_INTERACTIVE, + Reading = XML_TEXTREADER_MODE_READING, + // clang-format on + }; + + public: TTextReader(IInputStream& stream, const TOptions& options = TOptions()); - ~TTextReader(); - - /** - * Moves the position of the current instance to the next node in the stream, exposing its properties. - * @return true if the node was read successfully, false if there are no more nodes to read - */ - bool Read(); - - /** - * Reads the contents of the current node, including child nodes and markup. - * @return A string containing the XML content, or an empty string - * if the current node is neither an element nor attribute, or has no child nodes - */ + ~TTextReader(); + + /** + * Moves the position of the current instance to the next node in the stream, exposing its properties. + * @return true if the node was read successfully, false if there are no more nodes to read + */ + bool Read(); + + /** + * Reads the contents of the current node, including child nodes and markup. + * @return A string containing the XML content, or an empty string + * if the current node is neither an element nor attribute, or has no child nodes + */ TString ReadInnerXml() const; - - /** - * Reads the current node and its contents, including child nodes and markup. - * @return A string containing the XML content, or an empty string - * if the current node is neither an element nor attribute - */ + + /** + * Reads the current node and its contents, including child nodes and markup. + * @return A string containing the XML content, or an empty string + * if the current node is neither an element nor attribute + */ TString ReadOuterXml() const; - - /** - * Reads the contents of an element or a text node as a string. - * @return A string containing the contents of the Element or Text node, - * or an empty string if the reader is positioned on any other type of node - */ + + /** + * Reads the contents of an element or a text node as a string. + * @return A string containing the contents of the Element or Text node, + * or an empty string if the reader is positioned on any other type of node + */ TString ReadString() const; - - /** - * Parses an attribute value into one or more Text and EntityReference nodes. - * @return A bool where true indicates the attribute value was parsed, - * and false indicates the reader was not positioned on an attribute node - * or all the attribute values have been read - */ - bool ReadAttributeValue() const; - - /** - * Gets the number of attributes on the current node. - * @return The number of attributes on the current node, or zero if the current node - * does not support attributes - */ - int GetAttributeCount() const; - - /** - * Gets the base Uniform Resource Identifier (URI) of the current node. - * @return The base URI of the current node or an empty string if not available - */ - TStringBuf GetBaseUri() const; - - /** - * Gets the depth of the current node in the XML document. - * @return The depth of the current node in the XML document - */ - int GetDepth() const; - - /** - * Gets a value indicating whether the current node has any attributes. - * @return true if the current has attributes, false otherwise - */ - bool HasAttributes() const; - - /** - * Whether the node can have a text value. - * @return true if the current node can have an associated text value, false otherwise - */ - bool HasValue() const; - - /** - * Whether an Attribute node was generated from the default value defined in the DTD or schema. - * @return true if defaulted, false otherwise - */ - bool IsDefault() const; - - /** - * Check if the current node is empty. - * @return true if empty, false otherwise - */ - bool IsEmptyElement() const; - - /** - * The local name of the node. - * @return the local name or empty string if not available - */ - TStringBuf GetLocalName() const; - - /** - * The qualified name of the node, equal to Prefix:LocalName. - * @return the name or empty string if not available - */ - TStringBuf GetName() const; - - /** - * The URI defining the namespace associated with the node. - * @return the namespace URI or empty string if not available - */ - TStringBuf GetNamespaceUri() const; - - /** - * Get the node type of the current node. - * @return the ENodeType of the current node - */ - ENodeType GetNodeType() const; - - /** - * Get the namespace prefix associated with the current node. - * @return the namespace prefix, or an empty string if not available - */ - TStringBuf GetPrefix() const; - - /** - * Get the quotation mark character used to enclose the value of an attribute. - * @return " or ' - */ - char GetQuoteChar() const; - - /** - * Provides the text value of the node if present. - * @return the string or empty if not available - */ - TStringBuf GetValue() const; - - /** - * Gets the read state of the reader. - * @return the state value - */ - EReadState GetReadState() const; - - /** - * This method releases any resources allocated by the current instance - * changes the state to Closed and close any underlying input. - */ - void Close(); - - /** - * Provides the value of the attribute with the specified index relative to the containing element. - * @param number the zero-based index of the attribute relative to the containing element - */ + + /** + * Parses an attribute value into one or more Text and EntityReference nodes. + * @return A bool where true indicates the attribute value was parsed, + * and false indicates the reader was not positioned on an attribute node + * or all the attribute values have been read + */ + bool ReadAttributeValue() const; + + /** + * Gets the number of attributes on the current node. + * @return The number of attributes on the current node, or zero if the current node + * does not support attributes + */ + int GetAttributeCount() const; + + /** + * Gets the base Uniform Resource Identifier (URI) of the current node. + * @return The base URI of the current node or an empty string if not available + */ + TStringBuf GetBaseUri() const; + + /** + * Gets the depth of the current node in the XML document. + * @return The depth of the current node in the XML document + */ + int GetDepth() const; + + /** + * Gets a value indicating whether the current node has any attributes. + * @return true if the current has attributes, false otherwise + */ + bool HasAttributes() const; + + /** + * Whether the node can have a text value. + * @return true if the current node can have an associated text value, false otherwise + */ + bool HasValue() const; + + /** + * Whether an Attribute node was generated from the default value defined in the DTD or schema. + * @return true if defaulted, false otherwise + */ + bool IsDefault() const; + + /** + * Check if the current node is empty. + * @return true if empty, false otherwise + */ + bool IsEmptyElement() const; + + /** + * The local name of the node. + * @return the local name or empty string if not available + */ + TStringBuf GetLocalName() const; + + /** + * The qualified name of the node, equal to Prefix:LocalName. + * @return the name or empty string if not available + */ + TStringBuf GetName() const; + + /** + * The URI defining the namespace associated with the node. + * @return the namespace URI or empty string if not available + */ + TStringBuf GetNamespaceUri() const; + + /** + * Get the node type of the current node. + * @return the ENodeType of the current node + */ + ENodeType GetNodeType() const; + + /** + * Get the namespace prefix associated with the current node. + * @return the namespace prefix, or an empty string if not available + */ + TStringBuf GetPrefix() const; + + /** + * Get the quotation mark character used to enclose the value of an attribute. + * @return " or ' + */ + char GetQuoteChar() const; + + /** + * Provides the text value of the node if present. + * @return the string or empty if not available + */ + TStringBuf GetValue() const; + + /** + * Gets the read state of the reader. + * @return the state value + */ + EReadState GetReadState() const; + + /** + * This method releases any resources allocated by the current instance + * changes the state to Closed and close any underlying input. + */ + void Close(); + + /** + * Provides the value of the attribute with the specified index relative to the containing element. + * @param number the zero-based index of the attribute relative to the containing element + */ TString GetAttribute(int number) const; - - /** - * Provides the value of the attribute with the specified qualified name. - * @param name the qualified name of the attribute - */ + + /** + * Provides the value of the attribute with the specified qualified name. + * @param name the qualified name of the attribute + */ TString GetAttribute(TZtStringBuf name) const; - - /** - * Provides the value of the specified attribute. - * @param localName the local name of the attribute - * @param nsUri the namespace URI of the attribute - */ + + /** + * Provides the value of the specified attribute. + * @param localName the local name of the attribute + * @param nsUri the namespace URI of the attribute + */ TString GetAttribute(TZtStringBuf localName, TZtStringBuf nsUri) const; - - /** - * Resolves a namespace prefix in the scope of the current element. - * @param prefix the prefix whose namespace URI is to be resolved. To return the default namespace, specify empty string. - * @return a string containing the namespace URI to which the prefix maps. - */ + + /** + * Resolves a namespace prefix in the scope of the current element. + * @param prefix the prefix whose namespace URI is to be resolved. To return the default namespace, specify empty string. + * @return a string containing the namespace URI to which the prefix maps. + */ TString LookupNamespace(TZtStringBuf prefix) const; - - /** - * Moves the position of the current instance to the attribute with the specified index relative to the containing element. - * @param number the zero-based index of the attribute relative to the containing element - * @return true in case of success, false if not found - */ - bool MoveToAttribute(int number); - - /** - * Moves the position of the current instance to the attribute with the specified qualified name. - * @param name the qualified name of the attribute - * @return true in case of success, false if not found - */ + + /** + * Moves the position of the current instance to the attribute with the specified index relative to the containing element. + * @param number the zero-based index of the attribute relative to the containing element + * @return true in case of success, false if not found + */ + bool MoveToAttribute(int number); + + /** + * Moves the position of the current instance to the attribute with the specified qualified name. + * @param name the qualified name of the attribute + * @return true in case of success, false if not found + */ bool MoveToAttribute(TZtStringBuf name); - - /** - * Moves the position of the current instance to the attribute with the specified local name and namespace URI. - * @param localName the local name of the attribute - * @param nsUri the namespace URI of the attribute - * @return true in case of success, false if not found - */ + + /** + * Moves the position of the current instance to the attribute with the specified local name and namespace URI. + * @param localName the local name of the attribute + * @param nsUri the namespace URI of the attribute + * @return true in case of success, false if not found + */ bool MoveToAttribute(TZtStringBuf localName, TZtStringBuf nsUri); - - /** - * Moves the position of the current instance to the first attribute associated with the current node. - * @return true in case of success, false if not found - */ - bool MoveToFirstAttribute(); - - /** - * Moves the position of the current instance to the next attribute associated with the current node. - * @return true in case of success, false if not found - */ - bool MoveToNextAttribute(); - - /** - * Moves the position of the current instance to the node that contains the current Attribute node. - * @return true in case of success, false if not found - */ - bool MoveToElement(); - - /** - * Reads the contents of the current node and the full subtree. It then makes the subtree available until the next Read() call. - */ - TConstNode Expand() const; - - /** - * Skip to the node following the current one in document order while avoiding the subtree if any. - * @return true if the node was read successfully, false if there is no more nodes to read - */ - bool Next(); - - /** - * Retrieve the validity status from the parser context. - */ - bool IsValid() const; - - private: - static int ReadFromInputStreamCallback(void* context, char* buffer, int len); - static void OnLibxmlError(void* arg, const char* msg, xmlParserSeverities severity, xmlTextReaderLocatorPtr locator); - - void SetupErrorHandler(); - TStringStream& LogError() const; - void CheckForExceptions() const; - void ThrowException() const; - - // helpers that check return codes of C functions from libxml - bool BoolResult(int value) const; - int IntResult(int value) const; - char CharResult(int value) const; - TStringBuf ConstStringResult(const xmlChar* value) const; - TStringBuf ConstStringOrEmptyResult(const xmlChar* value) const; + + /** + * Moves the position of the current instance to the first attribute associated with the current node. + * @return true in case of success, false if not found + */ + bool MoveToFirstAttribute(); + + /** + * Moves the position of the current instance to the next attribute associated with the current node. + * @return true in case of success, false if not found + */ + bool MoveToNextAttribute(); + + /** + * Moves the position of the current instance to the node that contains the current Attribute node. + * @return true in case of success, false if not found + */ + bool MoveToElement(); + + /** + * Reads the contents of the current node and the full subtree. It then makes the subtree available until the next Read() call. + */ + TConstNode Expand() const; + + /** + * Skip to the node following the current one in document order while avoiding the subtree if any. + * @return true if the node was read successfully, false if there is no more nodes to read + */ + bool Next(); + + /** + * Retrieve the validity status from the parser context. + */ + bool IsValid() const; + + private: + static int ReadFromInputStreamCallback(void* context, char* buffer, int len); + static void OnLibxmlError(void* arg, const char* msg, xmlParserSeverities severity, xmlTextReaderLocatorPtr locator); + + void SetupErrorHandler(); + TStringStream& LogError() const; + void CheckForExceptions() const; + void ThrowException() const; + + // helpers that check return codes of C functions from libxml + bool BoolResult(int value) const; + int IntResult(int value) const; + char CharResult(int value) const; + TStringBuf ConstStringResult(const xmlChar* value) const; + TStringBuf ConstStringOrEmptyResult(const xmlChar* value) const; TString TempStringResult(TCharPtr value) const; TString TempStringOrEmptyResult(TCharPtr value) const; - - private: + + private: IInputStream& Stream; - - mutable bool IsError; - mutable TStringStream ErrorBuffer; - - struct TDeleter; - THolder<xmlTextReader, TDeleter> Impl; - }; - + + mutable bool IsError; + mutable TStringStream ErrorBuffer; + + struct TDeleter; + THolder<xmlTextReader, TDeleter> Impl; + }; + } diff --git a/library/cpp/xml/document/xml-textreader_ut.cpp b/library/cpp/xml/document/xml-textreader_ut.cpp index 6232dfe47e..9f54523fef 100644 --- a/library/cpp/xml/document/xml-textreader_ut.cpp +++ b/library/cpp/xml/document/xml-textreader_ut.cpp @@ -1,34 +1,34 @@ -#include "xml-textreader.h" - +#include "xml-textreader.h" + #include <library/cpp/testing/unittest/registar.h> - -#include <util/generic/hash.h> -#include <util/generic/vector.h> -#include <util/string/join.h> - -namespace { - /** - * Simple wrapper around the xmlTextReader wrapper - */ + +#include <util/generic/hash.h> +#include <util/generic/vector.h> +#include <util/string/join.h> + +namespace { + /** + * Simple wrapper around the xmlTextReader wrapper + */ void ParseXml(const TString& xmlData, std::function<void(NXml::TConstNode)> nodeHandlerFunc, const TString& localName, const TString& namespaceUri = TString()) { - TStringInput in(xmlData); - NXml::TTextReader reader(in); - - while (reader.Read()) { - if (reader.GetNodeType() == NXml::TTextReader::ENodeType::Element && - reader.GetLocalName() == localName && + TStringInput in(xmlData); + NXml::TTextReader reader(in); + + while (reader.Read()) { + if (reader.GetNodeType() == NXml::TTextReader::ENodeType::Element && + reader.GetLocalName() == localName && reader.GetNamespaceUri() == namespaceUri) { - const NXml::TConstNode node = reader.Expand(); - nodeHandlerFunc(node); - } - } - } + const NXml::TConstNode node = reader.Expand(); + nodeHandlerFunc(node); + } + } + } } - + Y_UNIT_TEST_SUITE(TestXmlTextReader) { Y_UNIT_TEST(BasicExample) { const TString xml = "<?xml version=\"1.0\"?>\n" @@ -40,73 +40,73 @@ Y_UNIT_TEST_SUITE(TestXmlTextReader) { " <child_of_child>Some content : -)</child_of_child>\n" " </examplechild>\n" "</example>\n"; - - TStringInput input(xml); - NXml::TTextReader reader(input); - - using ENT = NXml::TTextReader::ENodeType; - - struct TItem { - int Depth; - ENT Type; + + TStringInput input(xml); + NXml::TTextReader reader(input); + + using ENT = NXml::TTextReader::ENodeType; + + struct TItem { + int Depth; + ENT Type; TString Name; TString Attrs; TString Value; - }; - + }; + TVector<TItem> found; TVector<TString> msgs; - - while (reader.Read()) { - // dump attributes as "k1: v1, k2: v2, ..." + + while (reader.Read()) { + // dump attributes as "k1: v1, k2: v2, ..." TVector<TString> kv; - if (reader.HasAttributes()) { - reader.MoveToFirstAttribute(); - do { + if (reader.HasAttributes()) { + reader.MoveToFirstAttribute(); + do { kv.push_back(TString::Join(reader.GetName(), ": ", reader.GetValue())); - } while (reader.MoveToNextAttribute()); - reader.MoveToElement(); - } - - found.push_back(TItem{ - reader.GetDepth(), - reader.GetNodeType(), + } while (reader.MoveToNextAttribute()); + reader.MoveToElement(); + } + + found.push_back(TItem{ + reader.GetDepth(), + reader.GetNodeType(), TString(reader.GetName()), - JoinSeq(", ", kv), + JoinSeq(", ", kv), reader.HasValue() ? TString(reader.GetValue()) : TString(), - }); - } - + }); + } + const TVector<TItem> expected = { - TItem{0, ENT::Element, "example", "toto: 1", ""}, - TItem{1, ENT::SignificantWhitespace, "#text", "", "\n "}, - TItem{1, ENT::Element, "examplechild", "id: 1", ""}, - TItem{2, ENT::SignificantWhitespace, "#text", "", "\n "}, - TItem{2, ENT::Element, "child_of_child", "", ""}, - TItem{2, ENT::SignificantWhitespace, "#text", "", "\n "}, - TItem{1, ENT::EndElement, "examplechild", "id: 1", ""}, - TItem{1, ENT::SignificantWhitespace, "#text", "", "\n "}, - TItem{1, ENT::Element, "examplechild", "id: 2, toto: 3", ""}, - TItem{2, ENT::SignificantWhitespace, "#text", "", "\n "}, - TItem{2, ENT::Element, "child_of_child", "", ""}, - TItem{3, ENT::Text, "#text", "", "Some content : -)"}, - TItem{2, ENT::EndElement, "child_of_child", "", ""}, - TItem{2, ENT::SignificantWhitespace, "#text", "", "\n "}, - TItem{1, ENT::EndElement, "examplechild", "id: 2, toto: 3", ""}, - TItem{1, ENT::SignificantWhitespace, "#text", "", "\n"}, + TItem{0, ENT::Element, "example", "toto: 1", ""}, + TItem{1, ENT::SignificantWhitespace, "#text", "", "\n "}, + TItem{1, ENT::Element, "examplechild", "id: 1", ""}, + TItem{2, ENT::SignificantWhitespace, "#text", "", "\n "}, + TItem{2, ENT::Element, "child_of_child", "", ""}, + TItem{2, ENT::SignificantWhitespace, "#text", "", "\n "}, + TItem{1, ENT::EndElement, "examplechild", "id: 1", ""}, + TItem{1, ENT::SignificantWhitespace, "#text", "", "\n "}, + TItem{1, ENT::Element, "examplechild", "id: 2, toto: 3", ""}, + TItem{2, ENT::SignificantWhitespace, "#text", "", "\n "}, + TItem{2, ENT::Element, "child_of_child", "", ""}, + TItem{3, ENT::Text, "#text", "", "Some content : -)"}, + TItem{2, ENT::EndElement, "child_of_child", "", ""}, + TItem{2, ENT::SignificantWhitespace, "#text", "", "\n "}, + TItem{1, ENT::EndElement, "examplechild", "id: 2, toto: 3", ""}, + TItem{1, ENT::SignificantWhitespace, "#text", "", "\n"}, TItem{0, ENT::EndElement, "example", "toto: 1", ""}}; - - UNIT_ASSERT_VALUES_EQUAL(found.size(), expected.size()); - - for (size_t i = 0; i < expected.size(); ++i) { - UNIT_ASSERT_VALUES_EQUAL_C(found[i].Depth, expected[i].Depth, "line " << i); - UNIT_ASSERT_EQUAL_C(found[i].Type, expected[i].Type, "line " << i); - UNIT_ASSERT_VALUES_EQUAL_C(found[i].Name, expected[i].Name, "line " << i); - UNIT_ASSERT_VALUES_EQUAL_C(found[i].Attrs, expected[i].Attrs, "line " << i); - UNIT_ASSERT_VALUES_EQUAL_C(found[i].Value, expected[i].Value, "line " << i); - } - } - + + UNIT_ASSERT_VALUES_EQUAL(found.size(), expected.size()); + + for (size_t i = 0; i < expected.size(); ++i) { + UNIT_ASSERT_VALUES_EQUAL_C(found[i].Depth, expected[i].Depth, "line " << i); + UNIT_ASSERT_EQUAL_C(found[i].Type, expected[i].Type, "line " << i); + UNIT_ASSERT_VALUES_EQUAL_C(found[i].Name, expected[i].Name, "line " << i); + UNIT_ASSERT_VALUES_EQUAL_C(found[i].Attrs, expected[i].Attrs, "line " << i); + UNIT_ASSERT_VALUES_EQUAL_C(found[i].Value, expected[i].Value, "line " << i); + } + } + const TString GEODATA = "<?xml version=\"1.0\" encoding=\"utf-8\"?>" "<root>" "" @@ -133,158 +133,158 @@ Y_UNIT_TEST_SUITE(TestXmlTextReader) { " </country>" "" "</root>"; - + Y_UNIT_TEST(ParseXmlSimple) { - struct TCountry { + struct TCountry { TString Name; TVector<TString> Cities; - }; - + }; + THashMap<int, TCountry> data; - - auto handler = [&data](NXml::TConstNode node) { - const int id = node.Attr<int>("id"); - - TCountry& c = data[id]; - + + auto handler = [&data](NXml::TConstNode node) { + const int id = node.Attr<int>("id"); + + TCountry& c = data[id]; + c.Name = node.FirstChild("name").Value<TString>(); - - const NXml::TConstNodes cityNodes = node.Nodes("cities/city"); + + const NXml::TConstNodes cityNodes = node.Nodes("cities/city"); for (auto cityNode : cityNodes) { c.Cities.push_back(cityNode.Value<TString>()); - } - }; - - ParseXml(GEODATA, handler, "country"); - - UNIT_ASSERT_EQUAL(data.size(), 3); - + } + }; + + ParseXml(GEODATA, handler, "country"); + + UNIT_ASSERT_EQUAL(data.size(), 3); + UNIT_ASSERT(data.contains(225)); - const TCountry& russia = data.at(225); - UNIT_ASSERT_EQUAL(russia.Name, "Россия"); - UNIT_ASSERT_EQUAL(russia.Cities.size(), 2); - UNIT_ASSERT_EQUAL(russia.Cities[0], "Москва"); - UNIT_ASSERT_EQUAL(russia.Cities[1], "Санкт-Петербург"); - + const TCountry& russia = data.at(225); + UNIT_ASSERT_EQUAL(russia.Name, "Россия"); + UNIT_ASSERT_EQUAL(russia.Cities.size(), 2); + UNIT_ASSERT_EQUAL(russia.Cities[0], "Москва"); + UNIT_ASSERT_EQUAL(russia.Cities[1], "Санкт-Петербург"); + UNIT_ASSERT(data.contains(149)); - const TCountry& belarus = data.at(149); - UNIT_ASSERT_EQUAL(belarus.Name, "Беларусь"); - UNIT_ASSERT_EQUAL(belarus.Cities.size(), 1); - UNIT_ASSERT_EQUAL(belarus.Cities[0], "Минск"); - + const TCountry& belarus = data.at(149); + UNIT_ASSERT_EQUAL(belarus.Name, "Беларусь"); + UNIT_ASSERT_EQUAL(belarus.Cities.size(), 1); + UNIT_ASSERT_EQUAL(belarus.Cities[0], "Минск"); + UNIT_ASSERT(data.contains(187)); - const TCountry& ukraine = data.at(187); - UNIT_ASSERT_EQUAL(ukraine.Name, "Украина"); - UNIT_ASSERT_EQUAL(ukraine.Cities.size(), 1); - UNIT_ASSERT_EQUAL(ukraine.Cities[0], "Киев"); - } - + const TCountry& ukraine = data.at(187); + UNIT_ASSERT_EQUAL(ukraine.Name, "Украина"); + UNIT_ASSERT_EQUAL(ukraine.Cities.size(), 1); + UNIT_ASSERT_EQUAL(ukraine.Cities[0], "Киев"); + } + Y_UNIT_TEST(ParseXmlDeepLevel) { TVector<TString> cities; - - auto handler = [&cities](NXml::TConstNode node) { + + auto handler = [&cities](NXml::TConstNode node) { cities.push_back(node.Value<TString>()); - }; - - ParseXml(GEODATA, handler, "city"); - - UNIT_ASSERT_EQUAL(cities.size(), 4); - UNIT_ASSERT_EQUAL(cities[0], "Москва"); - UNIT_ASSERT_EQUAL(cities[1], "Санкт-Петербург"); - UNIT_ASSERT_EQUAL(cities[2], "Минск"); - UNIT_ASSERT_EQUAL(cities[3], "Киев"); - } - + }; + + ParseXml(GEODATA, handler, "city"); + + UNIT_ASSERT_EQUAL(cities.size(), 4); + UNIT_ASSERT_EQUAL(cities[0], "Москва"); + UNIT_ASSERT_EQUAL(cities[1], "Санкт-Петербург"); + UNIT_ASSERT_EQUAL(cities[2], "Минск"); + UNIT_ASSERT_EQUAL(cities[3], "Киев"); + } + Y_UNIT_TEST(ParseXmlException) { - // Check that exception properly passes through plain C code of libxml, - // no leaks are detected by valgrind. - auto handler = [](NXml::TConstNode node) { - const int id = node.Attr<int>("id"); - if (id != 225) { - ythrow yexception() << "unsupported id: " << id; - } - }; - - UNIT_ASSERT_EXCEPTION(ParseXml(GEODATA, handler, "country"), yexception); - UNIT_ASSERT_EXCEPTION(ParseXml("<a></b>", handler, "a"), yexception); - UNIT_ASSERT_EXCEPTION(ParseXml("<root><a id=\"1\"></a><a id=\"2\"></b></root>", handler, "a"), yexception); - UNIT_ASSERT_EXCEPTION(ParseXml("<root><a id=\"1\"></a><a id=\"2></a></root>", handler, "a"), yexception); - } - + // Check that exception properly passes through plain C code of libxml, + // no leaks are detected by valgrind. + auto handler = [](NXml::TConstNode node) { + const int id = node.Attr<int>("id"); + if (id != 225) { + ythrow yexception() << "unsupported id: " << id; + } + }; + + UNIT_ASSERT_EXCEPTION(ParseXml(GEODATA, handler, "country"), yexception); + UNIT_ASSERT_EXCEPTION(ParseXml("<a></b>", handler, "a"), yexception); + UNIT_ASSERT_EXCEPTION(ParseXml("<root><a id=\"1\"></a><a id=\"2\"></b></root>", handler, "a"), yexception); + UNIT_ASSERT_EXCEPTION(ParseXml("<root><a id=\"1\"></a><a id=\"2></a></root>", handler, "a"), yexception); + } + const TString BACKA = // UTF-8 encoding is used implicitly - "<Companies" - " xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"" - " xmlns=\"http://maps.yandex.ru/backa/1.x\"" - " xmlns:atom=\"http://www.w3.org/2005/Atom\"" - " xmlns:biz=\"http://maps.yandex.ru/business/1.x\"" - " xmlns:xal=\"urn:oasis:names:tc:ciq:xsdschema:xAL:2.0\"" - " xmlns:gml=\"http://www.opengis.net/gml\"" - ">" - "" - " <Company id=\"0001\">" - " <Geo>" - " <Location>" - " <gml:pos>37.62669 55.664827</gml:pos>" - " <kind>house</kind>" - " </Location>" - " <AddressDetails xmlns=\"urn:oasis:names:tc:ciq:xsdschema:xAL:2.0\">" - " <Country>" - " <AddressLine xml:lang=\"ru\">Москва, Каширское ш., 14</AddressLine>" - " </Country>" - " </AddressDetails>" - " </Geo>" - " </Company>" - "" - " <Company id=\"0002\">" - " <Geo>" - " <Location>" - " <pos xmlns=\"http://www.opengis.net/gml\">150.819797 59.56092</pos>" - " <kind>locality</kind>" - " </Location>" - " <xal:AddressDetails>" - " <xal:Country>" - " <xal:AddressLine xml:lang=\"ru\">Магадан, ул. Пролетарская, 43</xal:AddressLine>" - " </xal:Country>" - " </xal:AddressDetails>" - " </Geo>" - " </Company>" - "" - "</Companies>"; - + "<Companies" + " xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"" + " xmlns=\"http://maps.yandex.ru/backa/1.x\"" + " xmlns:atom=\"http://www.w3.org/2005/Atom\"" + " xmlns:biz=\"http://maps.yandex.ru/business/1.x\"" + " xmlns:xal=\"urn:oasis:names:tc:ciq:xsdschema:xAL:2.0\"" + " xmlns:gml=\"http://www.opengis.net/gml\"" + ">" + "" + " <Company id=\"0001\">" + " <Geo>" + " <Location>" + " <gml:pos>37.62669 55.664827</gml:pos>" + " <kind>house</kind>" + " </Location>" + " <AddressDetails xmlns=\"urn:oasis:names:tc:ciq:xsdschema:xAL:2.0\">" + " <Country>" + " <AddressLine xml:lang=\"ru\">Москва, Каширское ш., 14</AddressLine>" + " </Country>" + " </AddressDetails>" + " </Geo>" + " </Company>" + "" + " <Company id=\"0002\">" + " <Geo>" + " <Location>" + " <pos xmlns=\"http://www.opengis.net/gml\">150.819797 59.56092</pos>" + " <kind>locality</kind>" + " </Location>" + " <xal:AddressDetails>" + " <xal:Country>" + " <xal:AddressLine xml:lang=\"ru\">Магадан, ул. Пролетарская, 43</xal:AddressLine>" + " </xal:Country>" + " </xal:AddressDetails>" + " </Geo>" + " </Company>" + "" + "</Companies>"; + Y_UNIT_TEST(NamespaceHell) { - using TNS = NXml::TNamespaceForXPath; + using TNS = NXml::TNamespaceForXPath; const NXml::TNamespacesForXPath ns = { - TNS{"b", "http://maps.yandex.ru/backa/1.x"}, - TNS{"gml", "http://www.opengis.net/gml"}, + TNS{"b", "http://maps.yandex.ru/backa/1.x"}, + TNS{"gml", "http://www.opengis.net/gml"}, TNS{"xal", "urn:oasis:names:tc:ciq:xsdschema:xAL:2.0"}}; - - int count = 0; + + int count = 0; THashMap<TString, TString> positions; THashMap<TString, TString> addresses; - - auto handler = [&](NXml::TConstNode node) { - count++; + + auto handler = [&](NXml::TConstNode node) { + count++; const auto id = node.Attr<TString>("id"); - - NXml::TXPathContextPtr ctxt = node.CreateXPathContext(ns); - - const NXml::TConstNode location = node.Node("b:Geo/b:Location", false, *ctxt); + + NXml::TXPathContextPtr ctxt = node.CreateXPathContext(ns); + + const NXml::TConstNode location = node.Node("b:Geo/b:Location", false, *ctxt); positions[id] = location.Node("gml:pos", false, *ctxt).Value<TString>(); addresses[id] = node.Node("b:Geo/xal:AddressDetails/xal:Country/xal:AddressLine", false, *ctxt).Value<TString>(); - }; - - ParseXml(BACKA, handler, "Company"); - UNIT_ASSERT_EQUAL(count, 0); - // nothing found because namespace was not specified - - ParseXml(BACKA, handler, "Company", "http://maps.yandex.ru/backa/1.x"); - - UNIT_ASSERT_VALUES_EQUAL(count, 2); - - UNIT_ASSERT_VALUES_EQUAL(positions["0001"], "37.62669 55.664827"); - UNIT_ASSERT_VALUES_EQUAL(positions["0002"], "150.819797 59.56092"); - - UNIT_ASSERT_VALUES_EQUAL(addresses["0001"], "Москва, Каширское ш., 14"); - UNIT_ASSERT_VALUES_EQUAL(addresses["0002"], "Магадан, ул. Пролетарская, 43"); - } -} + }; + + ParseXml(BACKA, handler, "Company"); + UNIT_ASSERT_EQUAL(count, 0); + // nothing found because namespace was not specified + + ParseXml(BACKA, handler, "Company", "http://maps.yandex.ru/backa/1.x"); + + UNIT_ASSERT_VALUES_EQUAL(count, 2); + + UNIT_ASSERT_VALUES_EQUAL(positions["0001"], "37.62669 55.664827"); + UNIT_ASSERT_VALUES_EQUAL(positions["0002"], "150.819797 59.56092"); + + UNIT_ASSERT_VALUES_EQUAL(addresses["0001"], "Москва, Каширское ш., 14"); + UNIT_ASSERT_VALUES_EQUAL(addresses["0002"], "Магадан, ул. Пролетарская, 43"); + } +} diff --git a/library/cpp/xml/document/ya.make b/library/cpp/xml/document/ya.make index 86bbd639cf..06a0065972 100644 --- a/library/cpp/xml/document/ya.make +++ b/library/cpp/xml/document/ya.make @@ -4,7 +4,7 @@ OWNER(finder) SRCS( xml-document.cpp - xml-textreader.cpp + xml-textreader.cpp xml-options.cpp ) |