aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/xml/document
diff options
context:
space:
mode:
authorsobols <sobols@yandex-team.ru>2022-02-10 16:47:08 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:47:08 +0300
commit03335cb18337a0ef51966452a66a69b01abea218 (patch)
treeb83306b6e37edeea782e9eed673d89286c4fef35 /library/cpp/xml/document
parent09961b69c61f471ddd594e0fd877df62a8021562 (diff)
downloadydb-03335cb18337a0ef51966452a66a69b01abea218.tar.gz
Restoring authorship annotation for <sobols@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'library/cpp/xml/document')
-rw-r--r--library/cpp/xml/document/libxml-guards.h2
-rw-r--r--library/cpp/xml/document/ut/ya.make2
-rw-r--r--library/cpp/xml/document/xml-document-decl.h202
-rw-r--r--library/cpp/xml/document/xml-document.cpp120
-rw-r--r--library/cpp/xml/document/xml-document_ut.cpp172
-rw-r--r--library/cpp/xml/document/xml-textreader.cpp572
-rw-r--r--library/cpp/xml/document/xml-textreader.h612
-rw-r--r--library/cpp/xml/document/xml-textreader_ut.cpp422
-rw-r--r--library/cpp/xml/document/ya.make2
9 files changed, 1053 insertions, 1053 deletions
diff --git a/library/cpp/xml/document/libxml-guards.h b/library/cpp/xml/document/libxml-guards.h
index 479ce285e5..4188cecff1 100644
--- a/library/cpp/xml/document/libxml-guards.h
+++ b/library/cpp/xml/document/libxml-guards.h
@@ -40,7 +40,7 @@ namespace NXml {
typedef TxmlXPathObjectPtr TXPathObjectPtr;
typedef TAutoPtr<char, NDetail::TSignedCharPtrTraits> TSignedCharPtr;
typedef TAutoPtr<xmlChar, NDetail::TCharPtrTraits> TCharPtr;
- typedef TxmlDocHolder TDocHolder;
+ typedef TxmlDocHolder TDocHolder;
typedef TxmlURIPtr TURIPtr;
typedef TxmlNodePtr TNodePtr;
typedef TAutoPtr<xmlOutputBuffer, NDetail::TOutputBufferPtrTraits> TOutputBufferPtr;
diff --git a/library/cpp/xml/document/ut/ya.make b/library/cpp/xml/document/ut/ya.make
index 9a7213baa0..e955448c66 100644
--- a/library/cpp/xml/document/ut/ya.make
+++ b/library/cpp/xml/document/ut/ya.make
@@ -4,7 +4,7 @@ OWNER(finder)
SRCS(
xml-document_ut.cpp
- xml-textreader_ut.cpp
+ xml-textreader_ut.cpp
xml-options_ut.cpp
)
diff --git a/library/cpp/xml/document/xml-document-decl.h b/library/cpp/xml/document/xml-document-decl.h
index cde87a311b..bfda1fb7e6 100644
--- a/library/cpp/xml/document/xml-document-decl.h
+++ b/library/cpp/xml/document/xml-document-decl.h
@@ -15,8 +15,8 @@ namespace NXml {
class TConstNodes;
class TConstNode;
- using TXPathContext = xmlXPathContext;
-
+ using TXPathContext = xmlXPathContext;
+
class TDocument {
public:
enum Source {
@@ -35,11 +35,11 @@ namespace NXml {
TDocument(const TString& source, Source type = File);
public:
- TDocument(const TDocument& that) = delete;
- TDocument& operator=(const TDocument& that) = delete;
+ TDocument(const TDocument& that) = delete;
+ TDocument& operator=(const TDocument& that) = delete;
- TDocument(TDocument&& that);
- TDocument& operator=(TDocument&& that);
+ TDocument(TDocument&& that);
+ TDocument& operator=(TDocument&& that);
/**
* get root element
@@ -74,12 +74,12 @@ namespace NXml {
void ParseFile(const TString& file);
void ParseString(TZtStringBuf xml);
- TDocument(TDocHolder doc)
- : Doc(std::move(doc))
+ TDocument(TDocHolder doc)
+ : Doc(std::move(doc))
{
}
- TDocHolder Doc;
+ TDocHolder Doc;
};
struct TNamespaceForXPath {
@@ -150,7 +150,7 @@ namespace NXml {
friend class TConstNode;
friend class TNode;
- TConstNodes(xmlDoc* doc, TXPathObjectPtr obj);
+ TConstNodes(xmlDoc* doc, TXPathObjectPtr obj);
size_t SizeValue;
xmlDoc* Doc;
@@ -161,7 +161,7 @@ namespace NXml {
public:
friend class TDocument;
friend class TConstNode;
- friend class TTextReader;
+ friend class TTextReader;
/**
* check if node is null
@@ -174,18 +174,18 @@ namespace NXml {
bool IsElementNode() const;
/**
- * Create xpath context to be used later for fast xpath evaluation.
- * @param nss: explicitly specify XML namespaces to use and their prefixes
- *
- * For better performance, when you need to evaluate several xpath expressions,
- * it makes sense to create a context, load namespace prefixes once
- * and use the context several times in Node(), Nodes(), XPath() function calls for several nodes.
- * The context may be used with any node of the current document, but
- * cannot be shared between different XML documents.
- */
- TXPathContextPtr CreateXPathContext(const TNamespacesForXPath& nss = TNamespacesForXPath()) const;
-
- /**
+ * Create xpath context to be used later for fast xpath evaluation.
+ * @param nss: explicitly specify XML namespaces to use and their prefixes
+ *
+ * For better performance, when you need to evaluate several xpath expressions,
+ * it makes sense to create a context, load namespace prefixes once
+ * and use the context several times in Node(), Nodes(), XPath() function calls for several nodes.
+ * The context may be used with any node of the current document, but
+ * cannot be shared between different XML documents.
+ */
+ TXPathContextPtr CreateXPathContext(const TNamespacesForXPath& nss = TNamespacesForXPath()) const;
+
+ /**
* get all element nodes matching given xpath expression
* @param xpath: xpath expression
* @param quiet: don't throw exception if zero nodes found
@@ -197,17 +197,17 @@ namespace NXml {
TConstNodes Nodes(TZtStringBuf xpath, bool quiet = false, const TNamespacesForXPath& ns = TNamespacesForXPath()) const;
/**
- * get all element nodes matching given xpath expression
- * @param xpath: xpath expression
- * @param quiet: don't throw exception if zero nodes found
- * @param ctxt: reusable xpath context
- *
- * For historical reasons, this only works for *element* nodes.
- * Use the XPath function if you need other kinds of nodes.
- */
+ * get all element nodes matching given xpath expression
+ * @param xpath: xpath expression
+ * @param quiet: don't throw exception if zero nodes found
+ * @param ctxt: reusable xpath context
+ *
+ * For historical reasons, this only works for *element* nodes.
+ * Use the XPath function if you need other kinds of nodes.
+ */
TConstNodes Nodes(TZtStringBuf xpath, bool quiet, TXPathContext& ctxt) const;
-
- /**
+
+ /**
* get all nodes matching given xpath expression
* @param xpath: xpath expression
* @param quiet: don't throw exception if zero nodes found
@@ -216,18 +216,18 @@ namespace NXml {
TConstNodes XPath(TZtStringBuf xpath, bool quiet = false, const TNamespacesForXPath& ns = TNamespacesForXPath()) const;
/**
- * get all nodes matching given xpath expression
- * @param xpath: xpath expression
- * @param quiet: don't throw exception if zero nodes found
- * @param ctxt: reusable xpath context
- */
+ * get all nodes matching given xpath expression
+ * @param xpath: xpath expression
+ * @param quiet: don't throw exception if zero nodes found
+ * @param ctxt: reusable xpath context
+ */
TConstNodes XPath(TZtStringBuf xpath, bool quiet, TXPathContext& ctxt) const;
-
- /**
+
+ /**
* get the first element node matching given xpath expression
* @param xpath: path to node (from current node)
* @param quiet: don't throw exception if node not found,
- * return null node (@see IsNull())
+ * return null node (@see IsNull())
* @param ns: explicitly specify XML namespaces to use and their prefixes
*
* For historical reasons, this only works for *element* nodes.
@@ -238,23 +238,23 @@ namespace NXml {
TConstNode Node(TZtStringBuf xpath, bool quiet = false, const TNamespacesForXPath& ns = TNamespacesForXPath()) const;
/**
- * get the first element node matching given xpath expression
- * @param xpath: path to node (from current node)
- * @param quiet: don't throw exception if node not found,
- * return null node (@see IsNull())
- * @param ctxt: reusable xpath context
- *
- * For historical reasons, this only works for *element* nodes.
- * Use the XPath function if you need other kinds of nodes.
- */
+ * get the first element node matching given xpath expression
+ * @param xpath: path to node (from current node)
+ * @param quiet: don't throw exception if node not found,
+ * return null node (@see IsNull())
+ * @param ctxt: reusable xpath context
+ *
+ * For historical reasons, this only works for *element* nodes.
+ * Use the XPath function if you need other kinds of nodes.
+ */
TNode Node(TZtStringBuf xpath, bool quiet, TXPathContext& ctxt);
TConstNode Node(TZtStringBuf xpath, bool quiet, TXPathContext& ctxt) const;
-
- /**
+
+ /**
* get node first child
* @param name: child name
- * @note if name is empty, returns the first child node of type "element"
- * @note returns null node if no child found
+ * @note if name is empty, returns the first child node of type "element"
+ * @note returns null node if no child found
*/
TNode FirstChild(TZtStringBuf name);
TConstNode FirstChild(TZtStringBuf name) const;
@@ -272,8 +272,8 @@ namespace NXml {
/**
* get node neighbour
* @param name: neighbour name
- * @note if name is empty, returns the next sibling node of type "element"
- * @node returns null node if no neighbour found
+ * @note if name is empty, returns the next sibling node of type "element"
+ * @node returns null node if no neighbour found
*/
TNode NextSibling(TZtStringBuf name);
TConstNode NextSibling(TZtStringBuf name) const;
@@ -495,18 +495,18 @@ namespace NXml {
}
/**
- * Create xpath context to be used later for fast xpath evaluation.
- * @param nss: explicitly specify XML namespaces to use and their prefixes
- */
+ * Create xpath context to be used later for fast xpath evaluation.
+ * @param nss: explicitly specify XML namespaces to use and their prefixes
+ */
TXPathContextPtr CreateXPathContext(const TNamespacesForXPath& nss = TNamespacesForXPath()) const {
- return ActualNode.CreateXPathContext(nss);
- }
-
- /**
+ return ActualNode.CreateXPathContext(nss);
+ }
+
+ /**
* get all element nodes matching given xpath expression
* @param xpath: xpath expression
* @param quiet: don't throw exception if zero nodes found
- * @param ns: explicitly specify XML namespaces to use and their prefixes
+ * @param ns: explicitly specify XML namespaces to use and their prefixes
*
* For historical reasons, this only works for *element* nodes.
* Use the XPath function if you need other kinds of nodes.
@@ -516,44 +516,44 @@ namespace NXml {
}
/**
- * get all element nodes matching given xpath expression
- * @param xpath: xpath expression
- * @param quiet: don't throw exception if zero nodes found
- * @param ctxt: reusable xpath context
- *
- * For historical reasons, this only works for *element* nodes.
- * Use the XPath function if you need other kinds of nodes.
- */
+ * get all element nodes matching given xpath expression
+ * @param xpath: xpath expression
+ * @param quiet: don't throw exception if zero nodes found
+ * @param ctxt: reusable xpath context
+ *
+ * For historical reasons, this only works for *element* nodes.
+ * Use the XPath function if you need other kinds of nodes.
+ */
TConstNodes Nodes(TZtStringBuf xpath, bool quiet, TXPathContext& ctxt) const {
- return ActualNode.Nodes(xpath, quiet, ctxt);
- }
-
- /**
+ return ActualNode.Nodes(xpath, quiet, ctxt);
+ }
+
+ /**
* get all nodes matching given xpath expression
* @param xpath: xpath expression
* @param quiet: don't throw exception if zero nodes found
- * @param ns: explicitly specify XML namespaces to use and their prefixes
+ * @param ns: explicitly specify XML namespaces to use and their prefixes
*/
TConstNodes XPath(TZtStringBuf xpath, bool quiet = false, const TNamespacesForXPath& ns = TNamespacesForXPath()) const {
return ActualNode.XPath(xpath, quiet, ns);
}
/**
- * get all nodes matching given xpath expression
- * @param xpath: xpath expression
- * @param quiet: don't throw exception if zero nodes found
- * @param ctxt: reusable xpath context
- */
+ * get all nodes matching given xpath expression
+ * @param xpath: xpath expression
+ * @param quiet: don't throw exception if zero nodes found
+ * @param ctxt: reusable xpath context
+ */
TConstNodes XPath(TZtStringBuf xpath, bool quiet, TXPathContext& ctxt) const {
- return ActualNode.XPath(xpath, quiet, ctxt);
- }
-
- /**
+ return ActualNode.XPath(xpath, quiet, ctxt);
+ }
+
+ /**
* get the first element node matching given xpath expression
* @param xpath: path to node (from current node)
* @param quiet: don't throw exception if node not found,
- * return null node (@see IsNull())
- * @param ns: explicitly specify XML namespaces to use and their prefixes
+ * return null node (@see IsNull())
+ * @param ns: explicitly specify XML namespaces to use and their prefixes
*
* For historical reasons, this only works for *element* nodes.
* Use the XPath function if you need other kinds of nodes.
@@ -562,20 +562,20 @@ namespace NXml {
return ActualNode.Node(xpath, quiet, ns);
}
- /**
- * get the first element node matching given xpath expression
- * @param xpath: path to node (from current node)
- * @param quiet: don't throw exception if node not found,
- * return null node (@see IsNull())
- * @param ctxt: reusable xpath context
- *
- * For historical reasons, this only works for *element* nodes.
- * Use the XPath function if you need other kinds of nodes.
- */
+ /**
+ * get the first element node matching given xpath expression
+ * @param xpath: path to node (from current node)
+ * @param quiet: don't throw exception if node not found,
+ * return null node (@see IsNull())
+ * @param ctxt: reusable xpath context
+ *
+ * For historical reasons, this only works for *element* nodes.
+ * Use the XPath function if you need other kinds of nodes.
+ */
TConstNode Node(TZtStringBuf xpath, bool quiet, TXPathContext& ctxt) const {
- return ActualNode.Node(xpath, quiet, ctxt);
- }
-
+ return ActualNode.Node(xpath, quiet, ctxt);
+ }
+
TConstNode FirstChild(TZtStringBuf name) const {
return ActualNode.FirstChild(name);
}
diff --git a/library/cpp/xml/document/xml-document.cpp b/library/cpp/xml/document/xml-document.cpp
index 6907217a14..18a554d732 100644
--- a/library/cpp/xml/document/xml-document.cpp
+++ b/library/cpp/xml/document/xml-document.cpp
@@ -26,7 +26,7 @@ namespace NXml {
ParseString(xml);
break;
case RootName: {
- TDocHolder doc(xmlNewDoc(XMLCHAR("1.0")));
+ TDocHolder doc(xmlNewDoc(XMLCHAR("1.0")));
if (!doc)
THROW(XmlException, "Can't create xml document.");
doc->encoding = xmlStrdup(XMLCHAR("utf-8"));
@@ -36,21 +36,21 @@ namespace NXml {
THROW(XmlException, "Can't create root node.");
xmlDocSetRootElement(doc.Get(), node.Get());
Y_UNUSED(node.Release());
- Doc = std::move(doc);
+ Doc = std::move(doc);
} break;
default:
THROW(InvalidArgument, "Wrong source type");
}
}
- TDocument::TDocument(TDocument&& doc)
- : Doc(std::move(doc.Doc))
+ TDocument::TDocument(TDocument&& doc)
+ : Doc(std::move(doc.Doc))
{
}
- TDocument& TDocument::operator=(TDocument&& doc) {
+ TDocument& TDocument::operator=(TDocument&& doc) {
if (this != &doc)
- doc.Swap(*this);
+ doc.Swap(*this);
return *this;
}
@@ -63,7 +63,7 @@ namespace NXml {
if (!pctx)
THROW(XmlException, "Can't create parser context");
- TDocHolder doc(xmlCtxtReadFile(pctx.Get(), file.c_str(), nullptr, XML_PARSE_NOCDATA));
+ TDocHolder doc(xmlCtxtReadFile(pctx.Get(), file.c_str(), nullptr, XML_PARSE_NOCDATA));
if (!doc)
THROW(XmlException, "Can't parse file " << file);
@@ -72,7 +72,7 @@ namespace NXml {
if (res == -1)
THROW(XmlException, "XIncludes processing failed");
- Doc = std::move(doc);
+ Doc = std::move(doc);
}
void TDocument::ParseString(TZtStringBuf xml) {
@@ -80,12 +80,12 @@ namespace NXml {
if (pctx.Get() == nullptr)
THROW(XmlException, "Can't create parser context");
- TDocHolder doc(xmlCtxtReadMemory(pctx.Get(), xml.c_str(), (int)xml.size(), nullptr, nullptr, XML_PARSE_NOCDATA));
+ TDocHolder doc(xmlCtxtReadMemory(pctx.Get(), xml.c_str(), (int)xml.size(), nullptr, nullptr, XML_PARSE_NOCDATA));
if (!doc)
THROW(XmlException, "Can't parse string");
- Doc = std::move(doc);
+ Doc = std::move(doc);
}
TNode TDocument::Root() {
@@ -113,34 +113,34 @@ namespace NXml {
}
TXPathContextPtr TNode::CreateXPathContext(const TNamespacesForXPath& nss) const {
- TXPathContextPtr ctx = xmlXPathNewContext(DocPointer);
- if (!ctx)
- THROW(XmlException, "Can't create empty xpath context");
-
- for (const auto& ns : nss) {
- const int r = xmlXPathRegisterNs(ctx.Get(), XMLCHAR(ns.Prefix.c_str()), XMLCHAR(ns.Url.c_str()));
- if (r != 0)
- THROW(XmlException, "Can't register namespace " << ns.Url << " with prefix " << ns.Prefix);
- }
-
- return ctx;
- }
-
+ TXPathContextPtr ctx = xmlXPathNewContext(DocPointer);
+ if (!ctx)
+ THROW(XmlException, "Can't create empty xpath context");
+
+ for (const auto& ns : nss) {
+ const int r = xmlXPathRegisterNs(ctx.Get(), XMLCHAR(ns.Prefix.c_str()), XMLCHAR(ns.Url.c_str()));
+ if (r != 0)
+ THROW(XmlException, "Can't register namespace " << ns.Url << " with prefix " << ns.Prefix);
+ }
+
+ return ctx;
+ }
+
TConstNodes TNode::XPath(TZtStringBuf xpath, bool quiet, const TNamespacesForXPath& ns) const {
- TXPathContextPtr ctxt = CreateXPathContext(ns);
- return XPath(xpath, quiet, *ctxt);
- }
+ TXPathContextPtr ctxt = CreateXPathContext(ns);
+ return XPath(xpath, quiet, *ctxt);
+ }
TConstNodes TNode::XPath(TZtStringBuf xpath, bool quiet, TXPathContext& ctxt) const {
- if (xmlXPathSetContextNode(NodePointer, &ctxt) != 0)
- THROW(XmlException, "Can't set xpath context node, probably the context is associated with another document");
-
- TXPathObjectPtr obj = xmlXPathEvalExpression(XMLCHAR(xpath.c_str()), &ctxt);
- if (!obj)
- THROW(XmlException, "Can't evaluate xpath expression " << xpath);
-
- TConstNodes nodes(DocPointer, obj);
-
+ if (xmlXPathSetContextNode(NodePointer, &ctxt) != 0)
+ THROW(XmlException, "Can't set xpath context node, probably the context is associated with another document");
+
+ TXPathObjectPtr obj = xmlXPathEvalExpression(XMLCHAR(xpath.c_str()), &ctxt);
+ if (!obj)
+ THROW(XmlException, "Can't evaluate xpath expression " << xpath);
+
+ TConstNodes nodes(DocPointer, obj);
+
if (nodes.Size() == 0 && !quiet)
THROW(NodeNotFound, xpath);
@@ -148,30 +148,30 @@ namespace NXml {
}
TConstNodes TNode::Nodes(TZtStringBuf xpath, bool quiet, const TNamespacesForXPath& ns) const {
- TXPathContextPtr ctxt = CreateXPathContext(ns);
- return Nodes(xpath, quiet, *ctxt);
- }
-
+ TXPathContextPtr ctxt = CreateXPathContext(ns);
+ return Nodes(xpath, quiet, *ctxt);
+ }
+
TConstNodes TNode::Nodes(TZtStringBuf xpath, bool quiet, TXPathContext& ctxt) const {
- TConstNodes nodes = XPath(xpath, quiet, ctxt);
+ TConstNodes nodes = XPath(xpath, quiet, ctxt);
if (nodes.Size() != 0 && !nodes[0].IsElementNode())
THROW(XmlException, "xpath points to non-element nodes: " << xpath);
return nodes;
}
TNode TNode::Node(TZtStringBuf xpath, bool quiet, const TNamespacesForXPath& ns) {
- TXPathContextPtr ctxt = CreateXPathContext(ns);
- return Node(xpath, quiet, *ctxt);
- }
+ TXPathContextPtr ctxt = CreateXPathContext(ns);
+ return Node(xpath, quiet, *ctxt);
+ }
TConstNode TNode::Node(TZtStringBuf xpath, bool quiet, const TNamespacesForXPath& ns) const {
- TXPathContextPtr ctxt = CreateXPathContext(ns);
- return Node(xpath, quiet, *ctxt);
- }
-
+ TXPathContextPtr ctxt = CreateXPathContext(ns);
+ return Node(xpath, quiet, *ctxt);
+ }
+
TNode TNode::Node(TZtStringBuf xpath, bool quiet, TXPathContext& ctxt) {
- TConstNodes n = Nodes(xpath, quiet, ctxt);
-
+ TConstNodes n = Nodes(xpath, quiet, ctxt);
+
if (n.Size() == 0 && !quiet)
THROW(NodeNotFound, xpath);
@@ -182,7 +182,7 @@ namespace NXml {
}
TConstNode TNode::Node(TZtStringBuf xpath, bool quiet, TXPathContext& ctxt) const {
- return const_cast<TNode*>(this)->Node(xpath, quiet, ctxt);
+ return const_cast<TNode*>(this)->Node(xpath, quiet, ctxt);
}
TNode TNode::FirstChild(TZtStringBuf name) {
@@ -271,9 +271,9 @@ namespace NXml {
}
TString TNode::Name() const {
- if (IsNull())
- THROW(XmlException, "Node is null");
-
+ if (IsNull())
+ THROW(XmlException, "Node is null");
+
return CAST2CHAR(NodePointer->name);
}
@@ -294,9 +294,9 @@ namespace NXml {
}
bool TNode::IsText() const {
- if (IsNull())
- THROW(XmlException, "Node is null");
-
+ if (IsNull())
+ THROW(XmlException, "Node is null");
+
return NodePointer->type == XML_TEXT_NODE;
}
@@ -368,10 +368,10 @@ namespace NXml {
return TConstNodesRef(*this);
}
- TConstNodes::TConstNodes(xmlDoc* doc, TXPathObjectPtr obj)
- : SizeValue(obj && obj->nodesetval ? obj->nodesetval->nodeNr : 0)
- , Doc(doc)
- , Obj(obj)
+ TConstNodes::TConstNodes(xmlDoc* doc, TXPathObjectPtr obj)
+ : SizeValue(obj && obj->nodesetval ? obj->nodesetval->nodeNr : 0)
+ , Doc(doc)
+ , Obj(obj)
{
}
diff --git a/library/cpp/xml/document/xml-document_ut.cpp b/library/cpp/xml/document/xml-document_ut.cpp
index 8361e3c503..9f537b75c4 100644
--- a/library/cpp/xml/document/xml-document_ut.cpp
+++ b/library/cpp/xml/document/xml-document_ut.cpp
@@ -84,11 +84,11 @@ Y_UNIT_TEST_SUITE(TestXmlDocument) {
TConstNode text = root.Node("h:text", false, nss);
UNIT_ASSERT_EQUAL(text.Value<TString>(), "Некоторый текст");
-
- // For performance you can create xpath context once using nss and pass it.
- TXPathContextPtr ctxt = root.CreateXPathContext(nss);
- UNIT_ASSERT(root.Node("text", true, *ctxt).IsNull());
- UNIT_ASSERT_EXCEPTION(root.Node("text", false, *ctxt), yexception);
+
+ // For performance you can create xpath context once using nss and pass it.
+ TXPathContextPtr ctxt = root.CreateXPathContext(nss);
+ UNIT_ASSERT(root.Node("text", true, *ctxt).IsNull());
+ UNIT_ASSERT_EXCEPTION(root.Node("text", false, *ctxt), yexception);
UNIT_ASSERT_EQUAL(root.Node("h:text", false, *ctxt).Value<TString>(), "Некоторый текст");
}
Y_UNIT_TEST(XmlNodes) {
@@ -119,37 +119,37 @@ Y_UNIT_TEST_SUITE(TestXmlDocument) {
iterLog << node2.Name() << ';';
}
UNIT_ASSERT_STRINGS_EQUAL(iterLog.Str(), "a;c;");
-
- // get only element nodes, ignore text nodes with empty "name" param
+
+ // get only element nodes, ignore text nodes with empty "name" param
node = root.FirstChild(TString());
- UNIT_ASSERT_EQUAL(node.IsText(), false);
- UNIT_ASSERT_EQUAL(node.Name(), "a");
+ UNIT_ASSERT_EQUAL(node.IsText(), false);
+ UNIT_ASSERT_EQUAL(node.Name(), "a");
node = node.NextSibling(TString());
- UNIT_ASSERT_EQUAL(node.IsText(), false);
- UNIT_ASSERT_EQUAL(node.Name(), "c");
-
- // use exact "name" to retrieve children and siblings
- node = root.FirstChild("a");
- UNIT_ASSERT_EQUAL(node.IsNull(), false);
- UNIT_ASSERT_EQUAL(node.Name(), "a");
- node = node.NextSibling("c");
- UNIT_ASSERT_EQUAL(node.IsNull(), false);
- UNIT_ASSERT_EQUAL(node.Name(), "c");
- node = root.FirstChild("c"); // skip "a"
- UNIT_ASSERT_EQUAL(node.IsNull(), false);
- UNIT_ASSERT_EQUAL(node.Name(), "c");
-
- // node not found: no exceptions, null nodes are returned
- node = root.FirstChild("b"); // b is not direct child of root
- UNIT_ASSERT_EQUAL(node.IsNull(), true);
- node = root.FirstChild("nosuchnode");
- UNIT_ASSERT_EQUAL(node.IsNull(), true);
- node = root.FirstChild();
- node = root.NextSibling("unknownnode");
- UNIT_ASSERT_EQUAL(node.IsNull(), true);
- UNIT_ASSERT_EXCEPTION(node.Name(), yexception);
+ UNIT_ASSERT_EQUAL(node.IsText(), false);
+ UNIT_ASSERT_EQUAL(node.Name(), "c");
+
+ // use exact "name" to retrieve children and siblings
+ node = root.FirstChild("a");
+ UNIT_ASSERT_EQUAL(node.IsNull(), false);
+ UNIT_ASSERT_EQUAL(node.Name(), "a");
+ node = node.NextSibling("c");
+ UNIT_ASSERT_EQUAL(node.IsNull(), false);
+ UNIT_ASSERT_EQUAL(node.Name(), "c");
+ node = root.FirstChild("c"); // skip "a"
+ UNIT_ASSERT_EQUAL(node.IsNull(), false);
+ UNIT_ASSERT_EQUAL(node.Name(), "c");
+
+ // node not found: no exceptions, null nodes are returned
+ node = root.FirstChild("b"); // b is not direct child of root
+ UNIT_ASSERT_EQUAL(node.IsNull(), true);
+ node = root.FirstChild("nosuchnode");
+ UNIT_ASSERT_EQUAL(node.IsNull(), true);
+ node = root.FirstChild();
+ node = root.NextSibling("unknownnode");
+ UNIT_ASSERT_EQUAL(node.IsNull(), true);
+ UNIT_ASSERT_EXCEPTION(node.Name(), yexception);
UNIT_ASSERT_EXCEPTION(node.Value<TString>(), yexception);
- UNIT_ASSERT_EXCEPTION(node.IsText(), yexception);
+ UNIT_ASSERT_EXCEPTION(node.IsText(), yexception);
}
Y_UNIT_TEST(DefVal) {
using namespace NXml;
@@ -235,49 +235,49 @@ Y_UNIT_TEST_SUITE(TestXmlDocument) {
UNIT_ASSERT_VALUES_EQUAL(n.ToString(), "<a><b len=\"15\" correct=\"1\">hello world</b></a>");
}
}
-
+
Y_UNIT_TEST(ReuseXPathContext) {
- using namespace NXml;
-
- TDocument xml(
- "<?xml version=\"1.0\"?>\n"
- "<root>\n"
- "<a><b><c>Hello, world!</c></b></a>\n"
- "<text x=\"10\">First</text>\n"
- "<text y=\"20\">Second</text>\n"
- "</root>",
- TDocument::String);
-
- TXPathContextPtr rootCtxt = xml.Root().CreateXPathContext();
-
- // Check Node()
- TConstNode b = xml.Root().Node("a/b", false, *rootCtxt);
-
- // We can use root node context for xpath evaluation in any node
- TConstNode c1 = b.Node("c", false, *rootCtxt);
+ using namespace NXml;
+
+ TDocument xml(
+ "<?xml version=\"1.0\"?>\n"
+ "<root>\n"
+ "<a><b><c>Hello, world!</c></b></a>\n"
+ "<text x=\"10\">First</text>\n"
+ "<text y=\"20\">Second</text>\n"
+ "</root>",
+ TDocument::String);
+
+ TXPathContextPtr rootCtxt = xml.Root().CreateXPathContext();
+
+ // Check Node()
+ TConstNode b = xml.Root().Node("a/b", false, *rootCtxt);
+
+ // We can use root node context for xpath evaluation in any node
+ TConstNode c1 = b.Node("c", false, *rootCtxt);
UNIT_ASSERT_EQUAL(c1.Value<TString>(), "Hello, world!");
-
- TXPathContextPtr bCtxt = b.CreateXPathContext();
- TConstNode c2 = b.Node("c", false, *bCtxt);
+
+ TXPathContextPtr bCtxt = b.CreateXPathContext();
+ TConstNode c2 = b.Node("c", false, *bCtxt);
UNIT_ASSERT_EQUAL(c2.Value<TString>(), "Hello, world!");
-
- // Mixing contexts from different documents is forbidden
- TDocument otherXml("<root></root>", TDocument::String);
- TXPathContextPtr otherCtxt = otherXml.Root().CreateXPathContext();
- UNIT_ASSERT_EXCEPTION(b.Node("c", false, *otherCtxt), yexception);
-
- // Check Nodes()
- TConstNodes texts = xml.Root().Nodes("text", true, *rootCtxt);
- UNIT_ASSERT_EQUAL(texts.Size(), 2);
-
- // Nodes() does't work for non-element nodes
- UNIT_ASSERT_EXCEPTION(xml.Root().Nodes("text/@x", true, *rootCtxt), yexception);
-
- // Check XPath()
- TConstNodes ys = xml.Root().XPath("text/@y", true, *rootCtxt);
- UNIT_ASSERT_EQUAL(ys.Size(), 1);
- UNIT_ASSERT_EQUAL(ys[0].Value<int>(), 20);
- }
+
+ // Mixing contexts from different documents is forbidden
+ TDocument otherXml("<root></root>", TDocument::String);
+ TXPathContextPtr otherCtxt = otherXml.Root().CreateXPathContext();
+ UNIT_ASSERT_EXCEPTION(b.Node("c", false, *otherCtxt), yexception);
+
+ // Check Nodes()
+ TConstNodes texts = xml.Root().Nodes("text", true, *rootCtxt);
+ UNIT_ASSERT_EQUAL(texts.Size(), 2);
+
+ // Nodes() does't work for non-element nodes
+ UNIT_ASSERT_EXCEPTION(xml.Root().Nodes("text/@x", true, *rootCtxt), yexception);
+
+ // Check XPath()
+ TConstNodes ys = xml.Root().XPath("text/@y", true, *rootCtxt);
+ UNIT_ASSERT_EQUAL(ys.Size(), 1);
+ UNIT_ASSERT_EQUAL(ys[0].Value<int>(), 20);
+ }
Y_UNIT_TEST(Html) {
using namespace NXml;
@@ -291,19 +291,19 @@ Y_UNIT_TEST_SUITE(TestXmlDocument) {
videoNode.SaveAsHtml(ss);
UNIT_ASSERT_EQUAL(ss.Str(), "<video controls></video>");
}
-
- Y_UNIT_TEST(Move) {
- using namespace NXml;
-
- TDocument xml1("foo", TDocument::RootName);
- xml1.Root().AddChild("bar");
-
- UNIT_ASSERT_VALUES_EQUAL(xml1.Root().ToString(), "<foo><bar/></foo>");
-
- TDocument xml2 = std::move(xml1);
- UNIT_ASSERT_EXCEPTION(xml1.Root(), yexception);
- UNIT_ASSERT_VALUES_EQUAL(xml2.Root().ToString(), "<foo><bar/></foo>");
- }
+
+ Y_UNIT_TEST(Move) {
+ using namespace NXml;
+
+ TDocument xml1("foo", TDocument::RootName);
+ xml1.Root().AddChild("bar");
+
+ UNIT_ASSERT_VALUES_EQUAL(xml1.Root().ToString(), "<foo><bar/></foo>");
+
+ TDocument xml2 = std::move(xml1);
+ UNIT_ASSERT_EXCEPTION(xml1.Root(), yexception);
+ UNIT_ASSERT_VALUES_EQUAL(xml2.Root().ToString(), "<foo><bar/></foo>");
+ }
Y_UNIT_TEST(StringConversion) {
using namespace NXml;
diff --git a/library/cpp/xml/document/xml-textreader.cpp b/library/cpp/xml/document/xml-textreader.cpp
index 291c1a0f55..b946f1fbf2 100644
--- a/library/cpp/xml/document/xml-textreader.cpp
+++ b/library/cpp/xml/document/xml-textreader.cpp
@@ -1,318 +1,318 @@
-#include "xml-textreader.h"
-
-#include <contrib/libs/libxml/include/libxml/xmlreader.h>
-
-#include <util/generic/yexception.h>
-#include <util/string/strip.h>
-#include <util/system/compiler.h>
-
-namespace NXml {
+#include "xml-textreader.h"
+
+#include <contrib/libs/libxml/include/libxml/xmlreader.h>
+
+#include <util/generic/yexception.h>
+#include <util/string/strip.h>
+#include <util/system/compiler.h>
+
+namespace NXml {
TTextReader::TTextReader(IInputStream& stream, const TOptions& options)
- : Stream(stream)
- , IsError(false)
- {
+ : Stream(stream)
+ , IsError(false)
+ {
Impl.Reset(xmlReaderForIO(ReadFromInputStreamCallback, nullptr, this, nullptr, nullptr, options.GetMask()));
-
- if (!Impl) {
- ythrow yexception() << "cannot instantiate underlying xmlTextReader structure";
- }
- SetupErrorHandler();
- CheckForExceptions();
- }
-
- TTextReader::~TTextReader() {
- }
-
- bool TTextReader::Read() {
- return BoolResult(xmlTextReaderRead(Impl.Get()));
- }
-
+
+ if (!Impl) {
+ ythrow yexception() << "cannot instantiate underlying xmlTextReader structure";
+ }
+ SetupErrorHandler();
+ CheckForExceptions();
+ }
+
+ TTextReader::~TTextReader() {
+ }
+
+ bool TTextReader::Read() {
+ return BoolResult(xmlTextReaderRead(Impl.Get()));
+ }
+
TString TTextReader::ReadInnerXml() const {
- return TempStringOrEmptyResult(xmlTextReaderReadInnerXml(Impl.Get()));
- }
-
+ return TempStringOrEmptyResult(xmlTextReaderReadInnerXml(Impl.Get()));
+ }
+
TString TTextReader::ReadOuterXml() const {
- return TempStringOrEmptyResult(xmlTextReaderReadOuterXml(Impl.Get()));
- }
-
+ return TempStringOrEmptyResult(xmlTextReaderReadOuterXml(Impl.Get()));
+ }
+
TString TTextReader::ReadString() const {
- return TempStringOrEmptyResult(xmlTextReaderReadString(Impl.Get()));
- }
-
- bool TTextReader::ReadAttributeValue() const {
- return BoolResult(xmlTextReaderReadAttributeValue(Impl.Get()));
- }
-
- int TTextReader::GetAttributeCount() const {
- return IntResult(xmlTextReaderAttributeCount(Impl.Get()));
- }
-
- TStringBuf TTextReader::GetBaseUri() const {
- return ConstStringOrEmptyResult(xmlTextReaderConstBaseUri(Impl.Get()));
- }
-
- int TTextReader::GetDepth() const {
- return IntResult(xmlTextReaderDepth(Impl.Get()));
- }
-
- bool TTextReader::HasAttributes() const {
- return BoolResult(xmlTextReaderHasAttributes(Impl.Get()));
- }
-
- bool TTextReader::HasValue() const {
- return BoolResult(xmlTextReaderHasValue(Impl.Get()));
- }
-
- bool TTextReader::IsDefault() const {
- return BoolResult(xmlTextReaderIsDefault(Impl.Get()));
- }
-
- bool TTextReader::IsEmptyElement() const {
- return BoolResult(xmlTextReaderIsEmptyElement(Impl.Get()));
- }
-
- TStringBuf TTextReader::GetLocalName() const {
- return ConstStringOrEmptyResult(xmlTextReaderConstLocalName(Impl.Get()));
- }
-
- TStringBuf TTextReader::GetName() const {
- return ConstStringOrEmptyResult(xmlTextReaderConstName(Impl.Get()));
- }
-
- TStringBuf TTextReader::GetNamespaceUri() const {
- return ConstStringOrEmptyResult(xmlTextReaderConstNamespaceUri(Impl.Get()));
- }
-
- TTextReader::ENodeType TTextReader::GetNodeType() const {
- return static_cast<ENodeType>(IntResult(xmlTextReaderNodeType(Impl.Get())));
- }
-
- TStringBuf TTextReader::GetPrefix() const {
- return ConstStringOrEmptyResult(xmlTextReaderConstPrefix(Impl.Get()));
- }
-
- char TTextReader::GetQuoteChar() const {
- return CharResult(xmlTextReaderQuoteChar(Impl.Get()));
- }
-
- TStringBuf TTextReader::GetValue() const {
- return ConstStringOrEmptyResult(xmlTextReaderConstValue(Impl.Get()));
- }
-
- TTextReader::EReadState TTextReader::GetReadState() const {
- return static_cast<EReadState>(IntResult(xmlTextReaderReadState(Impl.Get())));
- }
-
- void TTextReader::Close() {
- if (xmlTextReaderClose(Impl.Get()) == -1) {
- ThrowException();
- }
- }
-
+ return TempStringOrEmptyResult(xmlTextReaderReadString(Impl.Get()));
+ }
+
+ bool TTextReader::ReadAttributeValue() const {
+ return BoolResult(xmlTextReaderReadAttributeValue(Impl.Get()));
+ }
+
+ int TTextReader::GetAttributeCount() const {
+ return IntResult(xmlTextReaderAttributeCount(Impl.Get()));
+ }
+
+ TStringBuf TTextReader::GetBaseUri() const {
+ return ConstStringOrEmptyResult(xmlTextReaderConstBaseUri(Impl.Get()));
+ }
+
+ int TTextReader::GetDepth() const {
+ return IntResult(xmlTextReaderDepth(Impl.Get()));
+ }
+
+ bool TTextReader::HasAttributes() const {
+ return BoolResult(xmlTextReaderHasAttributes(Impl.Get()));
+ }
+
+ bool TTextReader::HasValue() const {
+ return BoolResult(xmlTextReaderHasValue(Impl.Get()));
+ }
+
+ bool TTextReader::IsDefault() const {
+ return BoolResult(xmlTextReaderIsDefault(Impl.Get()));
+ }
+
+ bool TTextReader::IsEmptyElement() const {
+ return BoolResult(xmlTextReaderIsEmptyElement(Impl.Get()));
+ }
+
+ TStringBuf TTextReader::GetLocalName() const {
+ return ConstStringOrEmptyResult(xmlTextReaderConstLocalName(Impl.Get()));
+ }
+
+ TStringBuf TTextReader::GetName() const {
+ return ConstStringOrEmptyResult(xmlTextReaderConstName(Impl.Get()));
+ }
+
+ TStringBuf TTextReader::GetNamespaceUri() const {
+ return ConstStringOrEmptyResult(xmlTextReaderConstNamespaceUri(Impl.Get()));
+ }
+
+ TTextReader::ENodeType TTextReader::GetNodeType() const {
+ return static_cast<ENodeType>(IntResult(xmlTextReaderNodeType(Impl.Get())));
+ }
+
+ TStringBuf TTextReader::GetPrefix() const {
+ return ConstStringOrEmptyResult(xmlTextReaderConstPrefix(Impl.Get()));
+ }
+
+ char TTextReader::GetQuoteChar() const {
+ return CharResult(xmlTextReaderQuoteChar(Impl.Get()));
+ }
+
+ TStringBuf TTextReader::GetValue() const {
+ return ConstStringOrEmptyResult(xmlTextReaderConstValue(Impl.Get()));
+ }
+
+ TTextReader::EReadState TTextReader::GetReadState() const {
+ return static_cast<EReadState>(IntResult(xmlTextReaderReadState(Impl.Get())));
+ }
+
+ void TTextReader::Close() {
+ if (xmlTextReaderClose(Impl.Get()) == -1) {
+ ThrowException();
+ }
+ }
+
TString TTextReader::GetAttribute(int number) const {
- return TempStringResult(xmlTextReaderGetAttributeNo(Impl.Get(), number));
- }
-
+ return TempStringResult(xmlTextReaderGetAttributeNo(Impl.Get(), number));
+ }
+
TString TTextReader::GetAttribute(TZtStringBuf name) const {
return TempStringResult(xmlTextReaderGetAttribute(Impl.Get(), XMLCHAR(name.data())));
- }
-
+ }
+
TString TTextReader::GetAttribute(TZtStringBuf localName, TZtStringBuf nsUri) const {
return TempStringResult(xmlTextReaderGetAttributeNs(Impl.Get(), XMLCHAR(localName.data()), XMLCHAR(nsUri.data())));
- }
-
+ }
+
TString TTextReader::LookupNamespace(TZtStringBuf prefix) const {
return TempStringResult(xmlTextReaderLookupNamespace(Impl.Get(), XMLCHAR(prefix.data())));
- }
-
- bool TTextReader::MoveToAttribute(int number) {
- return BoolResult(xmlTextReaderMoveToAttributeNo(Impl.Get(), number));
- }
-
+ }
+
+ bool TTextReader::MoveToAttribute(int number) {
+ return BoolResult(xmlTextReaderMoveToAttributeNo(Impl.Get(), number));
+ }
+
bool TTextReader::MoveToAttribute(TZtStringBuf name) {
return BoolResult(xmlTextReaderMoveToAttribute(Impl.Get(), XMLCHAR(name.data())));
- }
-
+ }
+
bool TTextReader::MoveToAttribute(TZtStringBuf localName, TZtStringBuf nsUri) {
return BoolResult(xmlTextReaderMoveToAttributeNs(Impl.Get(), XMLCHAR(localName.data()), XMLCHAR(nsUri.data())));
- }
-
- bool TTextReader::MoveToFirstAttribute() {
- return BoolResult(xmlTextReaderMoveToFirstAttribute(Impl.Get()));
- }
-
- bool TTextReader::MoveToNextAttribute() {
- return BoolResult(xmlTextReaderMoveToNextAttribute(Impl.Get()));
- }
-
- bool TTextReader::MoveToElement() {
- return BoolResult(xmlTextReaderMoveToElement(Impl.Get()));
- }
-
- TConstNode TTextReader::Expand() const {
- const xmlNodePtr node = xmlTextReaderExpand(Impl.Get());
- if (node == nullptr) {
- ThrowException();
- }
- return TConstNode(TNode(node->doc, node));
- }
-
- bool TTextReader::Next() {
- return BoolResult(xmlTextReaderNext(Impl.Get()));
- }
-
- bool TTextReader::IsValid() const {
- return BoolResult(xmlTextReaderIsValid(Impl.Get()));
- }
-
- // Callback for xmlReaderForIO() to read more data.
+ }
+
+ bool TTextReader::MoveToFirstAttribute() {
+ return BoolResult(xmlTextReaderMoveToFirstAttribute(Impl.Get()));
+ }
+
+ bool TTextReader::MoveToNextAttribute() {
+ return BoolResult(xmlTextReaderMoveToNextAttribute(Impl.Get()));
+ }
+
+ bool TTextReader::MoveToElement() {
+ return BoolResult(xmlTextReaderMoveToElement(Impl.Get()));
+ }
+
+ TConstNode TTextReader::Expand() const {
+ const xmlNodePtr node = xmlTextReaderExpand(Impl.Get());
+ if (node == nullptr) {
+ ThrowException();
+ }
+ return TConstNode(TNode(node->doc, node));
+ }
+
+ bool TTextReader::Next() {
+ return BoolResult(xmlTextReaderNext(Impl.Get()));
+ }
+
+ bool TTextReader::IsValid() const {
+ return BoolResult(xmlTextReaderIsValid(Impl.Get()));
+ }
+
+ // Callback for xmlReaderForIO() to read more data.
// It is almost "noexcept" (std::bad_alloc may happen when saving exception message to new TString).
- // Waiting for std::exception_ptr and std::rethrow_exception from C++11 in Arcadia to make it really "noexcept".
- int TTextReader::ReadFromInputStreamCallback(void* context, char* buffer, int len) {
+ // Waiting for std::exception_ptr and std::rethrow_exception from C++11 in Arcadia to make it really "noexcept".
+ int TTextReader::ReadFromInputStreamCallback(void* context, char* buffer, int len) {
Y_ASSERT(len >= 0);
- TTextReader* reader = static_cast<TTextReader*>(context);
-
- int result = -1;
-
+ TTextReader* reader = static_cast<TTextReader*>(context);
+
+ int result = -1;
+
// Exception may be thrown by IInputStream::Read().
- // It is caught unconditionally because exceptions cannot safely pass through libxml2 plain C code
- // (no destructors, no RAII, raw pointers, so in case of stack unwinding some memory gets leaked).
-
- try {
- result = reader->Stream.Read(buffer, len);
- } catch (const yexception& ex) {
- reader->LogError() << "read from input stream failed: " << ex;
- } catch (...) {
- reader->LogError() << "read from input stream failed";
- }
-
- return result;
- }
-
- void TTextReader::OnLibxmlError(void* arg, const char* msg, xmlParserSeverities severity, xmlTextReaderLocatorPtr locator) {
- TTextReader* reader = static_cast<TTextReader*>(arg);
+ // It is caught unconditionally because exceptions cannot safely pass through libxml2 plain C code
+ // (no destructors, no RAII, raw pointers, so in case of stack unwinding some memory gets leaked).
+
+ try {
+ result = reader->Stream.Read(buffer, len);
+ } catch (const yexception& ex) {
+ reader->LogError() << "read from input stream failed: " << ex;
+ } catch (...) {
+ reader->LogError() << "read from input stream failed";
+ }
+
+ return result;
+ }
+
+ void TTextReader::OnLibxmlError(void* arg, const char* msg, xmlParserSeverities severity, xmlTextReaderLocatorPtr locator) {
+ TTextReader* reader = static_cast<TTextReader*>(arg);
Y_ASSERT(reader != nullptr);
-
- TStringStream& out = reader->LogError();
-
- if (severity == XML_PARSER_SEVERITY_ERROR) {
- out << "libxml parse error";
- } else if (severity == XML_PARSER_SEVERITY_VALIDITY_ERROR) {
- out << "libxml validity error";
- } else {
- out << "libxml error";
- }
-
- if (locator != nullptr) {
- const int line = xmlTextReaderLocatorLineNumber(locator);
- const TCharPtr baseUri = xmlTextReaderLocatorBaseURI(locator);
- out << " (";
- if (line != -1) {
- out << "at line " << line;
- if (baseUri) {
- out << ", ";
- }
- }
- if (baseUri) {
- out << "base URI " << CAST2CHAR(baseUri.Get());
- }
- out << ")";
- }
-
- TStringBuf message = (msg != nullptr) ? msg : "unknown";
- message = StripStringRight(message); // remove trailing \n that is added by libxml
- if (!message.empty()) {
- out << ": " << message;
- }
- }
-
- void TTextReader::SetupErrorHandler() {
- xmlTextReaderErrorFunc func = nullptr;
- void* arg = nullptr;
-
- // We respect any other error handlers already set up:
- xmlTextReaderGetErrorHandler(Impl.Get(), &func, &arg);
- if (!func) {
- func = TTextReader::OnLibxmlError;
- xmlTextReaderSetErrorHandler(Impl.Get(), func, this);
- }
- }
-
- TStringStream& TTextReader::LogError() const {
- if (IsError) { // maybe there are previous errors
- ErrorBuffer << Endl;
- }
- IsError = true;
- return ErrorBuffer;
- }
-
- void TTextReader::CheckForExceptions() const {
+
+ TStringStream& out = reader->LogError();
+
+ if (severity == XML_PARSER_SEVERITY_ERROR) {
+ out << "libxml parse error";
+ } else if (severity == XML_PARSER_SEVERITY_VALIDITY_ERROR) {
+ out << "libxml validity error";
+ } else {
+ out << "libxml error";
+ }
+
+ if (locator != nullptr) {
+ const int line = xmlTextReaderLocatorLineNumber(locator);
+ const TCharPtr baseUri = xmlTextReaderLocatorBaseURI(locator);
+ out << " (";
+ if (line != -1) {
+ out << "at line " << line;
+ if (baseUri) {
+ out << ", ";
+ }
+ }
+ if (baseUri) {
+ out << "base URI " << CAST2CHAR(baseUri.Get());
+ }
+ out << ")";
+ }
+
+ TStringBuf message = (msg != nullptr) ? msg : "unknown";
+ message = StripStringRight(message); // remove trailing \n that is added by libxml
+ if (!message.empty()) {
+ out << ": " << message;
+ }
+ }
+
+ void TTextReader::SetupErrorHandler() {
+ xmlTextReaderErrorFunc func = nullptr;
+ void* arg = nullptr;
+
+ // We respect any other error handlers already set up:
+ xmlTextReaderGetErrorHandler(Impl.Get(), &func, &arg);
+ if (!func) {
+ func = TTextReader::OnLibxmlError;
+ xmlTextReaderSetErrorHandler(Impl.Get(), func, this);
+ }
+ }
+
+ TStringStream& TTextReader::LogError() const {
+ if (IsError) { // maybe there are previous errors
+ ErrorBuffer << Endl;
+ }
+ IsError = true;
+ return ErrorBuffer;
+ }
+
+ void TTextReader::CheckForExceptions() const {
if (Y_LIKELY(!IsError)) {
- return;
- }
-
+ return;
+ }
+
const TString message = ErrorBuffer.Str();
- ErrorBuffer.clear();
- IsError = false;
-
- ythrow yexception() << message;
- }
-
- void TTextReader::ThrowException() const {
- CheckForExceptions();
- // Probably CheckForExceptions() would throw an exception with more verbose message. As the last resort
- // (we do not even know the name of the failed libxml function, but it's possible to deduce it from stacktrace):
- ythrow yexception() << "libxml function returned error exit code";
- }
-
- bool TTextReader::BoolResult(int value) const {
+ ErrorBuffer.clear();
+ IsError = false;
+
+ ythrow yexception() << message;
+ }
+
+ void TTextReader::ThrowException() const {
+ CheckForExceptions();
+ // Probably CheckForExceptions() would throw an exception with more verbose message. As the last resort
+ // (we do not even know the name of the failed libxml function, but it's possible to deduce it from stacktrace):
+ ythrow yexception() << "libxml function returned error exit code";
+ }
+
+ bool TTextReader::BoolResult(int value) const {
if (Y_UNLIKELY(value == -1)) {
- ThrowException();
- }
- return (value != 0);
- }
-
- int TTextReader::IntResult(int value) const {
+ ThrowException();
+ }
+ return (value != 0);
+ }
+
+ int TTextReader::IntResult(int value) const {
if (Y_UNLIKELY(value == -1)) {
- ThrowException();
- }
- return value;
- }
-
- char TTextReader::CharResult(int value) const {
+ ThrowException();
+ }
+ return value;
+ }
+
+ char TTextReader::CharResult(int value) const {
if (Y_UNLIKELY(value == -1)) {
- ThrowException();
- }
- return static_cast<char>(value);
- }
-
- TStringBuf TTextReader::ConstStringResult(const xmlChar* value) const {
+ ThrowException();
+ }
+ return static_cast<char>(value);
+ }
+
+ TStringBuf TTextReader::ConstStringResult(const xmlChar* value) const {
if (Y_UNLIKELY(value == nullptr)) {
- ThrowException();
- }
- return CAST2CHAR(value);
- }
-
- TStringBuf TTextReader::ConstStringOrEmptyResult(const xmlChar* value) const {
- CheckForExceptions();
- return (value != nullptr) ? TStringBuf(CAST2CHAR(value)) : TStringBuf();
- }
-
+ ThrowException();
+ }
+ return CAST2CHAR(value);
+ }
+
+ TStringBuf TTextReader::ConstStringOrEmptyResult(const xmlChar* value) const {
+ CheckForExceptions();
+ return (value != nullptr) ? TStringBuf(CAST2CHAR(value)) : TStringBuf();
+ }
+
TString TTextReader::TempStringResult(TCharPtr value) const {
if (Y_UNLIKELY(value == nullptr)) {
- ThrowException();
- }
+ ThrowException();
+ }
return TString(CAST2CHAR(value.Get()));
- }
-
+ }
+
TString TTextReader::TempStringOrEmptyResult(TCharPtr value) const {
- CheckForExceptions();
+ CheckForExceptions();
return (value != nullptr) ? TString(CAST2CHAR(value.Get())) : TString();
- }
-
- struct TTextReader::TDeleter {
- static inline void Destroy(xmlTextReaderPtr handle) {
- xmlFreeTextReader(handle);
- }
- };
+ }
+
+ struct TTextReader::TDeleter {
+ static inline void Destroy(xmlTextReaderPtr handle) {
+ xmlFreeTextReader(handle);
+ }
+ };
}
diff --git a/library/cpp/xml/document/xml-textreader.h b/library/cpp/xml/document/xml-textreader.h
index 9e0b8be6ea..ab4c329d26 100644
--- a/library/cpp/xml/document/xml-textreader.h
+++ b/library/cpp/xml/document/xml-textreader.h
@@ -1,325 +1,325 @@
-#pragma once
-
-#include "xml-document.h"
+#pragma once
+
+#include "xml-document.h"
#include "xml-options.h"
-
-#include <contrib/libs/libxml/include/libxml/xmlreader.h>
-
+
+#include <contrib/libs/libxml/include/libxml/xmlreader.h>
+
#include <library/cpp/string_utils/ztstrbuf/ztstrbuf.h>
-#include <util/generic/noncopyable.h>
-#include <util/generic/ptr.h>
-#include <util/generic/strbuf.h>
+#include <util/generic/noncopyable.h>
+#include <util/generic/ptr.h>
+#include <util/generic/strbuf.h>
#include <util/generic/string.h>
#include <functional>
-#include <util/stream/input.h>
-#include <util/stream/str.h>
-
-namespace NXml {
- /**
- * TextReader Parser
- *
- * API of the XML streaming API based on C# interfaces.
- * Provides fast, non-cached, forward-only access to XML data.
- *
- * Like the SAX parser, the TextReader parser is suitable for sequential
- * parsing, but instead of implementing handlers for specific parts of the
- * document, it allows you to detect the current node type, process the node
- * accordingly, and skip forward in the document as much as necessary.
- *
- * Unlike the DOM parser, you may not move backwards in the XML document.
- * And unlike the SAX parser, you must not waste time processing nodes that do not
- * interest you.
- *
- * All methods are on the single parser instance, but their result depends on the current context.
- * For instance, use Read() to move to the next node, and MoveToElement() to navigate to child nodes.
- * These methods will return false when no more nodes are available. Then use
- * methods such as GetName() and GetValue() to examine the elements and their attributes.
- *
- * This wrapper is inspired by TextReader from libxml++.
- */
-
- class TTextReader: private TNonCopyable {
- public:
- // strongly-typed alias for enum from xmlreader.h
- enum class ENodeType : int {
- // clang-format off
- Attribute = XML_READER_TYPE_ATTRIBUTE,
- CDATA = XML_READER_TYPE_CDATA,
- Comment = XML_READER_TYPE_COMMENT,
- Document = XML_READER_TYPE_DOCUMENT,
- DocumentFragment = XML_READER_TYPE_DOCUMENT_FRAGMENT,
- DocumentType = XML_READER_TYPE_DOCUMENT_TYPE,
- Element = XML_READER_TYPE_ELEMENT,
- EndElement = XML_READER_TYPE_END_ELEMENT,
- EndEntity = XML_READER_TYPE_END_ENTITY,
- Entity = XML_READER_TYPE_ENTITY,
- EntityReference = XML_READER_TYPE_ENTITY_REFERENCE,
- None = XML_READER_TYPE_NONE,
- Notation = XML_READER_TYPE_NOTATION,
- ProcessingInstruction = XML_READER_TYPE_PROCESSING_INSTRUCTION,
- SignificantWhitespace = XML_READER_TYPE_SIGNIFICANT_WHITESPACE,
- Text = XML_READER_TYPE_TEXT,
- Whitespace = XML_READER_TYPE_WHITESPACE,
- XmlDeclaration = XML_READER_TYPE_XML_DECLARATION,
- // clang-format on
- };
-
- enum class EReadState : int {
- // clang-format off
- Closed = XML_TEXTREADER_MODE_CLOSED,
- EndOfFile = XML_TEXTREADER_MODE_EOF,
- Error = XML_TEXTREADER_MODE_ERROR,
- Initial = XML_TEXTREADER_MODE_INITIAL,
- Interactive = XML_TEXTREADER_MODE_INTERACTIVE,
- Reading = XML_TEXTREADER_MODE_READING,
- // clang-format on
- };
-
- public:
+#include <util/stream/input.h>
+#include <util/stream/str.h>
+
+namespace NXml {
+ /**
+ * TextReader Parser
+ *
+ * API of the XML streaming API based on C# interfaces.
+ * Provides fast, non-cached, forward-only access to XML data.
+ *
+ * Like the SAX parser, the TextReader parser is suitable for sequential
+ * parsing, but instead of implementing handlers for specific parts of the
+ * document, it allows you to detect the current node type, process the node
+ * accordingly, and skip forward in the document as much as necessary.
+ *
+ * Unlike the DOM parser, you may not move backwards in the XML document.
+ * And unlike the SAX parser, you must not waste time processing nodes that do not
+ * interest you.
+ *
+ * All methods are on the single parser instance, but their result depends on the current context.
+ * For instance, use Read() to move to the next node, and MoveToElement() to navigate to child nodes.
+ * These methods will return false when no more nodes are available. Then use
+ * methods such as GetName() and GetValue() to examine the elements and their attributes.
+ *
+ * This wrapper is inspired by TextReader from libxml++.
+ */
+
+ class TTextReader: private TNonCopyable {
+ public:
+ // strongly-typed alias for enum from xmlreader.h
+ enum class ENodeType : int {
+ // clang-format off
+ Attribute = XML_READER_TYPE_ATTRIBUTE,
+ CDATA = XML_READER_TYPE_CDATA,
+ Comment = XML_READER_TYPE_COMMENT,
+ Document = XML_READER_TYPE_DOCUMENT,
+ DocumentFragment = XML_READER_TYPE_DOCUMENT_FRAGMENT,
+ DocumentType = XML_READER_TYPE_DOCUMENT_TYPE,
+ Element = XML_READER_TYPE_ELEMENT,
+ EndElement = XML_READER_TYPE_END_ELEMENT,
+ EndEntity = XML_READER_TYPE_END_ENTITY,
+ Entity = XML_READER_TYPE_ENTITY,
+ EntityReference = XML_READER_TYPE_ENTITY_REFERENCE,
+ None = XML_READER_TYPE_NONE,
+ Notation = XML_READER_TYPE_NOTATION,
+ ProcessingInstruction = XML_READER_TYPE_PROCESSING_INSTRUCTION,
+ SignificantWhitespace = XML_READER_TYPE_SIGNIFICANT_WHITESPACE,
+ Text = XML_READER_TYPE_TEXT,
+ Whitespace = XML_READER_TYPE_WHITESPACE,
+ XmlDeclaration = XML_READER_TYPE_XML_DECLARATION,
+ // clang-format on
+ };
+
+ enum class EReadState : int {
+ // clang-format off
+ Closed = XML_TEXTREADER_MODE_CLOSED,
+ EndOfFile = XML_TEXTREADER_MODE_EOF,
+ Error = XML_TEXTREADER_MODE_ERROR,
+ Initial = XML_TEXTREADER_MODE_INITIAL,
+ Interactive = XML_TEXTREADER_MODE_INTERACTIVE,
+ Reading = XML_TEXTREADER_MODE_READING,
+ // clang-format on
+ };
+
+ public:
TTextReader(IInputStream& stream, const TOptions& options = TOptions());
- ~TTextReader();
-
- /**
- * Moves the position of the current instance to the next node in the stream, exposing its properties.
- * @return true if the node was read successfully, false if there are no more nodes to read
- */
- bool Read();
-
- /**
- * Reads the contents of the current node, including child nodes and markup.
- * @return A string containing the XML content, or an empty string
- * if the current node is neither an element nor attribute, or has no child nodes
- */
+ ~TTextReader();
+
+ /**
+ * Moves the position of the current instance to the next node in the stream, exposing its properties.
+ * @return true if the node was read successfully, false if there are no more nodes to read
+ */
+ bool Read();
+
+ /**
+ * Reads the contents of the current node, including child nodes and markup.
+ * @return A string containing the XML content, or an empty string
+ * if the current node is neither an element nor attribute, or has no child nodes
+ */
TString ReadInnerXml() const;
-
- /**
- * Reads the current node and its contents, including child nodes and markup.
- * @return A string containing the XML content, or an empty string
- * if the current node is neither an element nor attribute
- */
+
+ /**
+ * Reads the current node and its contents, including child nodes and markup.
+ * @return A string containing the XML content, or an empty string
+ * if the current node is neither an element nor attribute
+ */
TString ReadOuterXml() const;
-
- /**
- * Reads the contents of an element or a text node as a string.
- * @return A string containing the contents of the Element or Text node,
- * or an empty string if the reader is positioned on any other type of node
- */
+
+ /**
+ * Reads the contents of an element or a text node as a string.
+ * @return A string containing the contents of the Element or Text node,
+ * or an empty string if the reader is positioned on any other type of node
+ */
TString ReadString() const;
-
- /**
- * Parses an attribute value into one or more Text and EntityReference nodes.
- * @return A bool where true indicates the attribute value was parsed,
- * and false indicates the reader was not positioned on an attribute node
- * or all the attribute values have been read
- */
- bool ReadAttributeValue() const;
-
- /**
- * Gets the number of attributes on the current node.
- * @return The number of attributes on the current node, or zero if the current node
- * does not support attributes
- */
- int GetAttributeCount() const;
-
- /**
- * Gets the base Uniform Resource Identifier (URI) of the current node.
- * @return The base URI of the current node or an empty string if not available
- */
- TStringBuf GetBaseUri() const;
-
- /**
- * Gets the depth of the current node in the XML document.
- * @return The depth of the current node in the XML document
- */
- int GetDepth() const;
-
- /**
- * Gets a value indicating whether the current node has any attributes.
- * @return true if the current has attributes, false otherwise
- */
- bool HasAttributes() const;
-
- /**
- * Whether the node can have a text value.
- * @return true if the current node can have an associated text value, false otherwise
- */
- bool HasValue() const;
-
- /**
- * Whether an Attribute node was generated from the default value defined in the DTD or schema.
- * @return true if defaulted, false otherwise
- */
- bool IsDefault() const;
-
- /**
- * Check if the current node is empty.
- * @return true if empty, false otherwise
- */
- bool IsEmptyElement() const;
-
- /**
- * The local name of the node.
- * @return the local name or empty string if not available
- */
- TStringBuf GetLocalName() const;
-
- /**
- * The qualified name of the node, equal to Prefix:LocalName.
- * @return the name or empty string if not available
- */
- TStringBuf GetName() const;
-
- /**
- * The URI defining the namespace associated with the node.
- * @return the namespace URI or empty string if not available
- */
- TStringBuf GetNamespaceUri() const;
-
- /**
- * Get the node type of the current node.
- * @return the ENodeType of the current node
- */
- ENodeType GetNodeType() const;
-
- /**
- * Get the namespace prefix associated with the current node.
- * @return the namespace prefix, or an empty string if not available
- */
- TStringBuf GetPrefix() const;
-
- /**
- * Get the quotation mark character used to enclose the value of an attribute.
- * @return " or '
- */
- char GetQuoteChar() const;
-
- /**
- * Provides the text value of the node if present.
- * @return the string or empty if not available
- */
- TStringBuf GetValue() const;
-
- /**
- * Gets the read state of the reader.
- * @return the state value
- */
- EReadState GetReadState() const;
-
- /**
- * This method releases any resources allocated by the current instance
- * changes the state to Closed and close any underlying input.
- */
- void Close();
-
- /**
- * Provides the value of the attribute with the specified index relative to the containing element.
- * @param number the zero-based index of the attribute relative to the containing element
- */
+
+ /**
+ * Parses an attribute value into one or more Text and EntityReference nodes.
+ * @return A bool where true indicates the attribute value was parsed,
+ * and false indicates the reader was not positioned on an attribute node
+ * or all the attribute values have been read
+ */
+ bool ReadAttributeValue() const;
+
+ /**
+ * Gets the number of attributes on the current node.
+ * @return The number of attributes on the current node, or zero if the current node
+ * does not support attributes
+ */
+ int GetAttributeCount() const;
+
+ /**
+ * Gets the base Uniform Resource Identifier (URI) of the current node.
+ * @return The base URI of the current node or an empty string if not available
+ */
+ TStringBuf GetBaseUri() const;
+
+ /**
+ * Gets the depth of the current node in the XML document.
+ * @return The depth of the current node in the XML document
+ */
+ int GetDepth() const;
+
+ /**
+ * Gets a value indicating whether the current node has any attributes.
+ * @return true if the current has attributes, false otherwise
+ */
+ bool HasAttributes() const;
+
+ /**
+ * Whether the node can have a text value.
+ * @return true if the current node can have an associated text value, false otherwise
+ */
+ bool HasValue() const;
+
+ /**
+ * Whether an Attribute node was generated from the default value defined in the DTD or schema.
+ * @return true if defaulted, false otherwise
+ */
+ bool IsDefault() const;
+
+ /**
+ * Check if the current node is empty.
+ * @return true if empty, false otherwise
+ */
+ bool IsEmptyElement() const;
+
+ /**
+ * The local name of the node.
+ * @return the local name or empty string if not available
+ */
+ TStringBuf GetLocalName() const;
+
+ /**
+ * The qualified name of the node, equal to Prefix:LocalName.
+ * @return the name or empty string if not available
+ */
+ TStringBuf GetName() const;
+
+ /**
+ * The URI defining the namespace associated with the node.
+ * @return the namespace URI or empty string if not available
+ */
+ TStringBuf GetNamespaceUri() const;
+
+ /**
+ * Get the node type of the current node.
+ * @return the ENodeType of the current node
+ */
+ ENodeType GetNodeType() const;
+
+ /**
+ * Get the namespace prefix associated with the current node.
+ * @return the namespace prefix, or an empty string if not available
+ */
+ TStringBuf GetPrefix() const;
+
+ /**
+ * Get the quotation mark character used to enclose the value of an attribute.
+ * @return " or '
+ */
+ char GetQuoteChar() const;
+
+ /**
+ * Provides the text value of the node if present.
+ * @return the string or empty if not available
+ */
+ TStringBuf GetValue() const;
+
+ /**
+ * Gets the read state of the reader.
+ * @return the state value
+ */
+ EReadState GetReadState() const;
+
+ /**
+ * This method releases any resources allocated by the current instance
+ * changes the state to Closed and close any underlying input.
+ */
+ void Close();
+
+ /**
+ * Provides the value of the attribute with the specified index relative to the containing element.
+ * @param number the zero-based index of the attribute relative to the containing element
+ */
TString GetAttribute(int number) const;
-
- /**
- * Provides the value of the attribute with the specified qualified name.
- * @param name the qualified name of the attribute
- */
+
+ /**
+ * Provides the value of the attribute with the specified qualified name.
+ * @param name the qualified name of the attribute
+ */
TString GetAttribute(TZtStringBuf name) const;
-
- /**
- * Provides the value of the specified attribute.
- * @param localName the local name of the attribute
- * @param nsUri the namespace URI of the attribute
- */
+
+ /**
+ * Provides the value of the specified attribute.
+ * @param localName the local name of the attribute
+ * @param nsUri the namespace URI of the attribute
+ */
TString GetAttribute(TZtStringBuf localName, TZtStringBuf nsUri) const;
-
- /**
- * Resolves a namespace prefix in the scope of the current element.
- * @param prefix the prefix whose namespace URI is to be resolved. To return the default namespace, specify empty string.
- * @return a string containing the namespace URI to which the prefix maps.
- */
+
+ /**
+ * Resolves a namespace prefix in the scope of the current element.
+ * @param prefix the prefix whose namespace URI is to be resolved. To return the default namespace, specify empty string.
+ * @return a string containing the namespace URI to which the prefix maps.
+ */
TString LookupNamespace(TZtStringBuf prefix) const;
-
- /**
- * Moves the position of the current instance to the attribute with the specified index relative to the containing element.
- * @param number the zero-based index of the attribute relative to the containing element
- * @return true in case of success, false if not found
- */
- bool MoveToAttribute(int number);
-
- /**
- * Moves the position of the current instance to the attribute with the specified qualified name.
- * @param name the qualified name of the attribute
- * @return true in case of success, false if not found
- */
+
+ /**
+ * Moves the position of the current instance to the attribute with the specified index relative to the containing element.
+ * @param number the zero-based index of the attribute relative to the containing element
+ * @return true in case of success, false if not found
+ */
+ bool MoveToAttribute(int number);
+
+ /**
+ * Moves the position of the current instance to the attribute with the specified qualified name.
+ * @param name the qualified name of the attribute
+ * @return true in case of success, false if not found
+ */
bool MoveToAttribute(TZtStringBuf name);
-
- /**
- * Moves the position of the current instance to the attribute with the specified local name and namespace URI.
- * @param localName the local name of the attribute
- * @param nsUri the namespace URI of the attribute
- * @return true in case of success, false if not found
- */
+
+ /**
+ * Moves the position of the current instance to the attribute with the specified local name and namespace URI.
+ * @param localName the local name of the attribute
+ * @param nsUri the namespace URI of the attribute
+ * @return true in case of success, false if not found
+ */
bool MoveToAttribute(TZtStringBuf localName, TZtStringBuf nsUri);
-
- /**
- * Moves the position of the current instance to the first attribute associated with the current node.
- * @return true in case of success, false if not found
- */
- bool MoveToFirstAttribute();
-
- /**
- * Moves the position of the current instance to the next attribute associated with the current node.
- * @return true in case of success, false if not found
- */
- bool MoveToNextAttribute();
-
- /**
- * Moves the position of the current instance to the node that contains the current Attribute node.
- * @return true in case of success, false if not found
- */
- bool MoveToElement();
-
- /**
- * Reads the contents of the current node and the full subtree. It then makes the subtree available until the next Read() call.
- */
- TConstNode Expand() const;
-
- /**
- * Skip to the node following the current one in document order while avoiding the subtree if any.
- * @return true if the node was read successfully, false if there is no more nodes to read
- */
- bool Next();
-
- /**
- * Retrieve the validity status from the parser context.
- */
- bool IsValid() const;
-
- private:
- static int ReadFromInputStreamCallback(void* context, char* buffer, int len);
- static void OnLibxmlError(void* arg, const char* msg, xmlParserSeverities severity, xmlTextReaderLocatorPtr locator);
-
- void SetupErrorHandler();
- TStringStream& LogError() const;
- void CheckForExceptions() const;
- void ThrowException() const;
-
- // helpers that check return codes of C functions from libxml
- bool BoolResult(int value) const;
- int IntResult(int value) const;
- char CharResult(int value) const;
- TStringBuf ConstStringResult(const xmlChar* value) const;
- TStringBuf ConstStringOrEmptyResult(const xmlChar* value) const;
+
+ /**
+ * Moves the position of the current instance to the first attribute associated with the current node.
+ * @return true in case of success, false if not found
+ */
+ bool MoveToFirstAttribute();
+
+ /**
+ * Moves the position of the current instance to the next attribute associated with the current node.
+ * @return true in case of success, false if not found
+ */
+ bool MoveToNextAttribute();
+
+ /**
+ * Moves the position of the current instance to the node that contains the current Attribute node.
+ * @return true in case of success, false if not found
+ */
+ bool MoveToElement();
+
+ /**
+ * Reads the contents of the current node and the full subtree. It then makes the subtree available until the next Read() call.
+ */
+ TConstNode Expand() const;
+
+ /**
+ * Skip to the node following the current one in document order while avoiding the subtree if any.
+ * @return true if the node was read successfully, false if there is no more nodes to read
+ */
+ bool Next();
+
+ /**
+ * Retrieve the validity status from the parser context.
+ */
+ bool IsValid() const;
+
+ private:
+ static int ReadFromInputStreamCallback(void* context, char* buffer, int len);
+ static void OnLibxmlError(void* arg, const char* msg, xmlParserSeverities severity, xmlTextReaderLocatorPtr locator);
+
+ void SetupErrorHandler();
+ TStringStream& LogError() const;
+ void CheckForExceptions() const;
+ void ThrowException() const;
+
+ // helpers that check return codes of C functions from libxml
+ bool BoolResult(int value) const;
+ int IntResult(int value) const;
+ char CharResult(int value) const;
+ TStringBuf ConstStringResult(const xmlChar* value) const;
+ TStringBuf ConstStringOrEmptyResult(const xmlChar* value) const;
TString TempStringResult(TCharPtr value) const;
TString TempStringOrEmptyResult(TCharPtr value) const;
-
- private:
+
+ private:
IInputStream& Stream;
-
- mutable bool IsError;
- mutable TStringStream ErrorBuffer;
-
- struct TDeleter;
- THolder<xmlTextReader, TDeleter> Impl;
- };
-
+
+ mutable bool IsError;
+ mutable TStringStream ErrorBuffer;
+
+ struct TDeleter;
+ THolder<xmlTextReader, TDeleter> Impl;
+ };
+
}
diff --git a/library/cpp/xml/document/xml-textreader_ut.cpp b/library/cpp/xml/document/xml-textreader_ut.cpp
index 9f54523fef..6232dfe47e 100644
--- a/library/cpp/xml/document/xml-textreader_ut.cpp
+++ b/library/cpp/xml/document/xml-textreader_ut.cpp
@@ -1,34 +1,34 @@
-#include "xml-textreader.h"
-
+#include "xml-textreader.h"
+
#include <library/cpp/testing/unittest/registar.h>
-
-#include <util/generic/hash.h>
-#include <util/generic/vector.h>
-#include <util/string/join.h>
-
-namespace {
- /**
- * Simple wrapper around the xmlTextReader wrapper
- */
+
+#include <util/generic/hash.h>
+#include <util/generic/vector.h>
+#include <util/string/join.h>
+
+namespace {
+ /**
+ * Simple wrapper around the xmlTextReader wrapper
+ */
void ParseXml(const TString& xmlData,
std::function<void(NXml::TConstNode)> nodeHandlerFunc,
const TString& localName,
const TString& namespaceUri = TString()) {
- TStringInput in(xmlData);
- NXml::TTextReader reader(in);
-
- while (reader.Read()) {
- if (reader.GetNodeType() == NXml::TTextReader::ENodeType::Element &&
- reader.GetLocalName() == localName &&
+ TStringInput in(xmlData);
+ NXml::TTextReader reader(in);
+
+ while (reader.Read()) {
+ if (reader.GetNodeType() == NXml::TTextReader::ENodeType::Element &&
+ reader.GetLocalName() == localName &&
reader.GetNamespaceUri() == namespaceUri)
{
- const NXml::TConstNode node = reader.Expand();
- nodeHandlerFunc(node);
- }
- }
- }
+ const NXml::TConstNode node = reader.Expand();
+ nodeHandlerFunc(node);
+ }
+ }
+ }
}
-
+
Y_UNIT_TEST_SUITE(TestXmlTextReader) {
Y_UNIT_TEST(BasicExample) {
const TString xml = "<?xml version=\"1.0\"?>\n"
@@ -40,73 +40,73 @@ Y_UNIT_TEST_SUITE(TestXmlTextReader) {
" <child_of_child>Some content : -)</child_of_child>\n"
" </examplechild>\n"
"</example>\n";
-
- TStringInput input(xml);
- NXml::TTextReader reader(input);
-
- using ENT = NXml::TTextReader::ENodeType;
-
- struct TItem {
- int Depth;
- ENT Type;
+
+ TStringInput input(xml);
+ NXml::TTextReader reader(input);
+
+ using ENT = NXml::TTextReader::ENodeType;
+
+ struct TItem {
+ int Depth;
+ ENT Type;
TString Name;
TString Attrs;
TString Value;
- };
-
+ };
+
TVector<TItem> found;
TVector<TString> msgs;
-
- while (reader.Read()) {
- // dump attributes as "k1: v1, k2: v2, ..."
+
+ while (reader.Read()) {
+ // dump attributes as "k1: v1, k2: v2, ..."
TVector<TString> kv;
- if (reader.HasAttributes()) {
- reader.MoveToFirstAttribute();
- do {
+ if (reader.HasAttributes()) {
+ reader.MoveToFirstAttribute();
+ do {
kv.push_back(TString::Join(reader.GetName(), ": ", reader.GetValue()));
- } while (reader.MoveToNextAttribute());
- reader.MoveToElement();
- }
-
- found.push_back(TItem{
- reader.GetDepth(),
- reader.GetNodeType(),
+ } while (reader.MoveToNextAttribute());
+ reader.MoveToElement();
+ }
+
+ found.push_back(TItem{
+ reader.GetDepth(),
+ reader.GetNodeType(),
TString(reader.GetName()),
- JoinSeq(", ", kv),
+ JoinSeq(", ", kv),
reader.HasValue() ? TString(reader.GetValue()) : TString(),
- });
- }
-
+ });
+ }
+
const TVector<TItem> expected = {
- TItem{0, ENT::Element, "example", "toto: 1", ""},
- TItem{1, ENT::SignificantWhitespace, "#text", "", "\n "},
- TItem{1, ENT::Element, "examplechild", "id: 1", ""},
- TItem{2, ENT::SignificantWhitespace, "#text", "", "\n "},
- TItem{2, ENT::Element, "child_of_child", "", ""},
- TItem{2, ENT::SignificantWhitespace, "#text", "", "\n "},
- TItem{1, ENT::EndElement, "examplechild", "id: 1", ""},
- TItem{1, ENT::SignificantWhitespace, "#text", "", "\n "},
- TItem{1, ENT::Element, "examplechild", "id: 2, toto: 3", ""},
- TItem{2, ENT::SignificantWhitespace, "#text", "", "\n "},
- TItem{2, ENT::Element, "child_of_child", "", ""},
- TItem{3, ENT::Text, "#text", "", "Some content : -)"},
- TItem{2, ENT::EndElement, "child_of_child", "", ""},
- TItem{2, ENT::SignificantWhitespace, "#text", "", "\n "},
- TItem{1, ENT::EndElement, "examplechild", "id: 2, toto: 3", ""},
- TItem{1, ENT::SignificantWhitespace, "#text", "", "\n"},
+ TItem{0, ENT::Element, "example", "toto: 1", ""},
+ TItem{1, ENT::SignificantWhitespace, "#text", "", "\n "},
+ TItem{1, ENT::Element, "examplechild", "id: 1", ""},
+ TItem{2, ENT::SignificantWhitespace, "#text", "", "\n "},
+ TItem{2, ENT::Element, "child_of_child", "", ""},
+ TItem{2, ENT::SignificantWhitespace, "#text", "", "\n "},
+ TItem{1, ENT::EndElement, "examplechild", "id: 1", ""},
+ TItem{1, ENT::SignificantWhitespace, "#text", "", "\n "},
+ TItem{1, ENT::Element, "examplechild", "id: 2, toto: 3", ""},
+ TItem{2, ENT::SignificantWhitespace, "#text", "", "\n "},
+ TItem{2, ENT::Element, "child_of_child", "", ""},
+ TItem{3, ENT::Text, "#text", "", "Some content : -)"},
+ TItem{2, ENT::EndElement, "child_of_child", "", ""},
+ TItem{2, ENT::SignificantWhitespace, "#text", "", "\n "},
+ TItem{1, ENT::EndElement, "examplechild", "id: 2, toto: 3", ""},
+ TItem{1, ENT::SignificantWhitespace, "#text", "", "\n"},
TItem{0, ENT::EndElement, "example", "toto: 1", ""}};
-
- UNIT_ASSERT_VALUES_EQUAL(found.size(), expected.size());
-
- for (size_t i = 0; i < expected.size(); ++i) {
- UNIT_ASSERT_VALUES_EQUAL_C(found[i].Depth, expected[i].Depth, "line " << i);
- UNIT_ASSERT_EQUAL_C(found[i].Type, expected[i].Type, "line " << i);
- UNIT_ASSERT_VALUES_EQUAL_C(found[i].Name, expected[i].Name, "line " << i);
- UNIT_ASSERT_VALUES_EQUAL_C(found[i].Attrs, expected[i].Attrs, "line " << i);
- UNIT_ASSERT_VALUES_EQUAL_C(found[i].Value, expected[i].Value, "line " << i);
- }
- }
-
+
+ UNIT_ASSERT_VALUES_EQUAL(found.size(), expected.size());
+
+ for (size_t i = 0; i < expected.size(); ++i) {
+ UNIT_ASSERT_VALUES_EQUAL_C(found[i].Depth, expected[i].Depth, "line " << i);
+ UNIT_ASSERT_EQUAL_C(found[i].Type, expected[i].Type, "line " << i);
+ UNIT_ASSERT_VALUES_EQUAL_C(found[i].Name, expected[i].Name, "line " << i);
+ UNIT_ASSERT_VALUES_EQUAL_C(found[i].Attrs, expected[i].Attrs, "line " << i);
+ UNIT_ASSERT_VALUES_EQUAL_C(found[i].Value, expected[i].Value, "line " << i);
+ }
+ }
+
const TString GEODATA = "<?xml version=\"1.0\" encoding=\"utf-8\"?>"
"<root>"
""
@@ -133,158 +133,158 @@ Y_UNIT_TEST_SUITE(TestXmlTextReader) {
" </country>"
""
"</root>";
-
+
Y_UNIT_TEST(ParseXmlSimple) {
- struct TCountry {
+ struct TCountry {
TString Name;
TVector<TString> Cities;
- };
-
+ };
+
THashMap<int, TCountry> data;
-
- auto handler = [&data](NXml::TConstNode node) {
- const int id = node.Attr<int>("id");
-
- TCountry& c = data[id];
-
+
+ auto handler = [&data](NXml::TConstNode node) {
+ const int id = node.Attr<int>("id");
+
+ TCountry& c = data[id];
+
c.Name = node.FirstChild("name").Value<TString>();
-
- const NXml::TConstNodes cityNodes = node.Nodes("cities/city");
+
+ const NXml::TConstNodes cityNodes = node.Nodes("cities/city");
for (auto cityNode : cityNodes) {
c.Cities.push_back(cityNode.Value<TString>());
- }
- };
-
- ParseXml(GEODATA, handler, "country");
-
- UNIT_ASSERT_EQUAL(data.size(), 3);
-
+ }
+ };
+
+ ParseXml(GEODATA, handler, "country");
+
+ UNIT_ASSERT_EQUAL(data.size(), 3);
+
UNIT_ASSERT(data.contains(225));
- const TCountry& russia = data.at(225);
- UNIT_ASSERT_EQUAL(russia.Name, "Россия");
- UNIT_ASSERT_EQUAL(russia.Cities.size(), 2);
- UNIT_ASSERT_EQUAL(russia.Cities[0], "Москва");
- UNIT_ASSERT_EQUAL(russia.Cities[1], "Санкт-Петербург");
-
+ const TCountry& russia = data.at(225);
+ UNIT_ASSERT_EQUAL(russia.Name, "Россия");
+ UNIT_ASSERT_EQUAL(russia.Cities.size(), 2);
+ UNIT_ASSERT_EQUAL(russia.Cities[0], "Москва");
+ UNIT_ASSERT_EQUAL(russia.Cities[1], "Санкт-Петербург");
+
UNIT_ASSERT(data.contains(149));
- const TCountry& belarus = data.at(149);
- UNIT_ASSERT_EQUAL(belarus.Name, "Беларусь");
- UNIT_ASSERT_EQUAL(belarus.Cities.size(), 1);
- UNIT_ASSERT_EQUAL(belarus.Cities[0], "Минск");
-
+ const TCountry& belarus = data.at(149);
+ UNIT_ASSERT_EQUAL(belarus.Name, "Беларусь");
+ UNIT_ASSERT_EQUAL(belarus.Cities.size(), 1);
+ UNIT_ASSERT_EQUAL(belarus.Cities[0], "Минск");
+
UNIT_ASSERT(data.contains(187));
- const TCountry& ukraine = data.at(187);
- UNIT_ASSERT_EQUAL(ukraine.Name, "Украина");
- UNIT_ASSERT_EQUAL(ukraine.Cities.size(), 1);
- UNIT_ASSERT_EQUAL(ukraine.Cities[0], "Киев");
- }
-
+ const TCountry& ukraine = data.at(187);
+ UNIT_ASSERT_EQUAL(ukraine.Name, "Украина");
+ UNIT_ASSERT_EQUAL(ukraine.Cities.size(), 1);
+ UNIT_ASSERT_EQUAL(ukraine.Cities[0], "Киев");
+ }
+
Y_UNIT_TEST(ParseXmlDeepLevel) {
TVector<TString> cities;
-
- auto handler = [&cities](NXml::TConstNode node) {
+
+ auto handler = [&cities](NXml::TConstNode node) {
cities.push_back(node.Value<TString>());
- };
-
- ParseXml(GEODATA, handler, "city");
-
- UNIT_ASSERT_EQUAL(cities.size(), 4);
- UNIT_ASSERT_EQUAL(cities[0], "Москва");
- UNIT_ASSERT_EQUAL(cities[1], "Санкт-Петербург");
- UNIT_ASSERT_EQUAL(cities[2], "Минск");
- UNIT_ASSERT_EQUAL(cities[3], "Киев");
- }
-
+ };
+
+ ParseXml(GEODATA, handler, "city");
+
+ UNIT_ASSERT_EQUAL(cities.size(), 4);
+ UNIT_ASSERT_EQUAL(cities[0], "Москва");
+ UNIT_ASSERT_EQUAL(cities[1], "Санкт-Петербург");
+ UNIT_ASSERT_EQUAL(cities[2], "Минск");
+ UNIT_ASSERT_EQUAL(cities[3], "Киев");
+ }
+
Y_UNIT_TEST(ParseXmlException) {
- // Check that exception properly passes through plain C code of libxml,
- // no leaks are detected by valgrind.
- auto handler = [](NXml::TConstNode node) {
- const int id = node.Attr<int>("id");
- if (id != 225) {
- ythrow yexception() << "unsupported id: " << id;
- }
- };
-
- UNIT_ASSERT_EXCEPTION(ParseXml(GEODATA, handler, "country"), yexception);
- UNIT_ASSERT_EXCEPTION(ParseXml("<a></b>", handler, "a"), yexception);
- UNIT_ASSERT_EXCEPTION(ParseXml("<root><a id=\"1\"></a><a id=\"2\"></b></root>", handler, "a"), yexception);
- UNIT_ASSERT_EXCEPTION(ParseXml("<root><a id=\"1\"></a><a id=\"2></a></root>", handler, "a"), yexception);
- }
-
+ // Check that exception properly passes through plain C code of libxml,
+ // no leaks are detected by valgrind.
+ auto handler = [](NXml::TConstNode node) {
+ const int id = node.Attr<int>("id");
+ if (id != 225) {
+ ythrow yexception() << "unsupported id: " << id;
+ }
+ };
+
+ UNIT_ASSERT_EXCEPTION(ParseXml(GEODATA, handler, "country"), yexception);
+ UNIT_ASSERT_EXCEPTION(ParseXml("<a></b>", handler, "a"), yexception);
+ UNIT_ASSERT_EXCEPTION(ParseXml("<root><a id=\"1\"></a><a id=\"2\"></b></root>", handler, "a"), yexception);
+ UNIT_ASSERT_EXCEPTION(ParseXml("<root><a id=\"1\"></a><a id=\"2></a></root>", handler, "a"), yexception);
+ }
+
const TString BACKA = // UTF-8 encoding is used implicitly
- "<Companies"
- " xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\""
- " xmlns=\"http://maps.yandex.ru/backa/1.x\""
- " xmlns:atom=\"http://www.w3.org/2005/Atom\""
- " xmlns:biz=\"http://maps.yandex.ru/business/1.x\""
- " xmlns:xal=\"urn:oasis:names:tc:ciq:xsdschema:xAL:2.0\""
- " xmlns:gml=\"http://www.opengis.net/gml\""
- ">"
- ""
- " <Company id=\"0001\">"
- " <Geo>"
- " <Location>"
- " <gml:pos>37.62669 55.664827</gml:pos>"
- " <kind>house</kind>"
- " </Location>"
- " <AddressDetails xmlns=\"urn:oasis:names:tc:ciq:xsdschema:xAL:2.0\">"
- " <Country>"
- " <AddressLine xml:lang=\"ru\">Москва, Каширское ш., 14</AddressLine>"
- " </Country>"
- " </AddressDetails>"
- " </Geo>"
- " </Company>"
- ""
- " <Company id=\"0002\">"
- " <Geo>"
- " <Location>"
- " <pos xmlns=\"http://www.opengis.net/gml\">150.819797 59.56092</pos>"
- " <kind>locality</kind>"
- " </Location>"
- " <xal:AddressDetails>"
- " <xal:Country>"
- " <xal:AddressLine xml:lang=\"ru\">Магадан, ул. Пролетарская, 43</xal:AddressLine>"
- " </xal:Country>"
- " </xal:AddressDetails>"
- " </Geo>"
- " </Company>"
- ""
- "</Companies>";
-
+ "<Companies"
+ " xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\""
+ " xmlns=\"http://maps.yandex.ru/backa/1.x\""
+ " xmlns:atom=\"http://www.w3.org/2005/Atom\""
+ " xmlns:biz=\"http://maps.yandex.ru/business/1.x\""
+ " xmlns:xal=\"urn:oasis:names:tc:ciq:xsdschema:xAL:2.0\""
+ " xmlns:gml=\"http://www.opengis.net/gml\""
+ ">"
+ ""
+ " <Company id=\"0001\">"
+ " <Geo>"
+ " <Location>"
+ " <gml:pos>37.62669 55.664827</gml:pos>"
+ " <kind>house</kind>"
+ " </Location>"
+ " <AddressDetails xmlns=\"urn:oasis:names:tc:ciq:xsdschema:xAL:2.0\">"
+ " <Country>"
+ " <AddressLine xml:lang=\"ru\">Москва, Каширское ш., 14</AddressLine>"
+ " </Country>"
+ " </AddressDetails>"
+ " </Geo>"
+ " </Company>"
+ ""
+ " <Company id=\"0002\">"
+ " <Geo>"
+ " <Location>"
+ " <pos xmlns=\"http://www.opengis.net/gml\">150.819797 59.56092</pos>"
+ " <kind>locality</kind>"
+ " </Location>"
+ " <xal:AddressDetails>"
+ " <xal:Country>"
+ " <xal:AddressLine xml:lang=\"ru\">Магадан, ул. Пролетарская, 43</xal:AddressLine>"
+ " </xal:Country>"
+ " </xal:AddressDetails>"
+ " </Geo>"
+ " </Company>"
+ ""
+ "</Companies>";
+
Y_UNIT_TEST(NamespaceHell) {
- using TNS = NXml::TNamespaceForXPath;
+ using TNS = NXml::TNamespaceForXPath;
const NXml::TNamespacesForXPath ns = {
- TNS{"b", "http://maps.yandex.ru/backa/1.x"},
- TNS{"gml", "http://www.opengis.net/gml"},
+ TNS{"b", "http://maps.yandex.ru/backa/1.x"},
+ TNS{"gml", "http://www.opengis.net/gml"},
TNS{"xal", "urn:oasis:names:tc:ciq:xsdschema:xAL:2.0"}};
-
- int count = 0;
+
+ int count = 0;
THashMap<TString, TString> positions;
THashMap<TString, TString> addresses;
-
- auto handler = [&](NXml::TConstNode node) {
- count++;
+
+ auto handler = [&](NXml::TConstNode node) {
+ count++;
const auto id = node.Attr<TString>("id");
-
- NXml::TXPathContextPtr ctxt = node.CreateXPathContext(ns);
-
- const NXml::TConstNode location = node.Node("b:Geo/b:Location", false, *ctxt);
+
+ NXml::TXPathContextPtr ctxt = node.CreateXPathContext(ns);
+
+ const NXml::TConstNode location = node.Node("b:Geo/b:Location", false, *ctxt);
positions[id] = location.Node("gml:pos", false, *ctxt).Value<TString>();
addresses[id] = node.Node("b:Geo/xal:AddressDetails/xal:Country/xal:AddressLine", false, *ctxt).Value<TString>();
- };
-
- ParseXml(BACKA, handler, "Company");
- UNIT_ASSERT_EQUAL(count, 0);
- // nothing found because namespace was not specified
-
- ParseXml(BACKA, handler, "Company", "http://maps.yandex.ru/backa/1.x");
-
- UNIT_ASSERT_VALUES_EQUAL(count, 2);
-
- UNIT_ASSERT_VALUES_EQUAL(positions["0001"], "37.62669 55.664827");
- UNIT_ASSERT_VALUES_EQUAL(positions["0002"], "150.819797 59.56092");
-
- UNIT_ASSERT_VALUES_EQUAL(addresses["0001"], "Москва, Каширское ш., 14");
- UNIT_ASSERT_VALUES_EQUAL(addresses["0002"], "Магадан, ул. Пролетарская, 43");
- }
-}
+ };
+
+ ParseXml(BACKA, handler, "Company");
+ UNIT_ASSERT_EQUAL(count, 0);
+ // nothing found because namespace was not specified
+
+ ParseXml(BACKA, handler, "Company", "http://maps.yandex.ru/backa/1.x");
+
+ UNIT_ASSERT_VALUES_EQUAL(count, 2);
+
+ UNIT_ASSERT_VALUES_EQUAL(positions["0001"], "37.62669 55.664827");
+ UNIT_ASSERT_VALUES_EQUAL(positions["0002"], "150.819797 59.56092");
+
+ UNIT_ASSERT_VALUES_EQUAL(addresses["0001"], "Москва, Каширское ш., 14");
+ UNIT_ASSERT_VALUES_EQUAL(addresses["0002"], "Магадан, ул. Пролетарская, 43");
+ }
+}
diff --git a/library/cpp/xml/document/ya.make b/library/cpp/xml/document/ya.make
index 06a0065972..86bbd639cf 100644
--- a/library/cpp/xml/document/ya.make
+++ b/library/cpp/xml/document/ya.make
@@ -4,7 +4,7 @@ OWNER(finder)
SRCS(
xml-document.cpp
- xml-textreader.cpp
+ xml-textreader.cpp
xml-options.cpp
)