diff options
| author | Devtools Arcadia <[email protected]> | 2022-02-07 18:08:42 +0300 | 
|---|---|---|
| committer | Devtools Arcadia <[email protected]> | 2022-02-07 18:08:42 +0300 | 
| commit | 1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch) | |
| tree | e26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/xml/document | |
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/xml/document')
| -rw-r--r-- | library/cpp/xml/document/README | 42 | ||||
| -rw-r--r-- | library/cpp/xml/document/libxml-guards.h | 50 | ||||
| -rw-r--r-- | library/cpp/xml/document/node-attr.h | 209 | ||||
| -rw-r--r-- | library/cpp/xml/document/ut/ya.make | 11 | ||||
| -rw-r--r-- | library/cpp/xml/document/xml-document-decl.h | 718 | ||||
| -rw-r--r-- | library/cpp/xml/document/xml-document.cpp | 393 | ||||
| -rw-r--r-- | library/cpp/xml/document/xml-document.h | 4 | ||||
| -rw-r--r-- | library/cpp/xml/document/xml-document_ut.cpp | 319 | ||||
| -rw-r--r-- | library/cpp/xml/document/xml-options.cpp | 1 | ||||
| -rw-r--r-- | library/cpp/xml/document/xml-options.h | 67 | ||||
| -rw-r--r-- | library/cpp/xml/document/xml-options_ut.cpp | 26 | ||||
| -rw-r--r-- | library/cpp/xml/document/xml-textreader.cpp | 318 | ||||
| -rw-r--r-- | library/cpp/xml/document/xml-textreader.h | 325 | ||||
| -rw-r--r-- | library/cpp/xml/document/xml-textreader_ut.cpp | 290 | ||||
| -rw-r--r-- | library/cpp/xml/document/ya.make | 17 | 
15 files changed, 2790 insertions, 0 deletions
| diff --git a/library/cpp/xml/document/README b/library/cpp/xml/document/README new file mode 100644 index 00000000000..b2649523d8f --- /dev/null +++ b/library/cpp/xml/document/README @@ -0,0 +1,42 @@ +A wrapper around the DOM interface of libxml2. + +The standard way to use it is as follows: + +    #include <library/cpp/xml/document/xml-document.h> +    ... + +    // open a document +    NXml::TDocument xml("filename.xml"); + +    // get a nodeset from an XPath query +    NXml::TConstNodes nodes = xml.Root().Nodes("xpath/expression/here"); + +    // iterate over the nodeset +    for (size_t i = 0; i < nodes.size(); ++i) { +        using namespace NXml; +        TConstNode& node = nodes[i]; +        // query node +        TString name = node.Name(); +        TString lang = node.Attr<TString>("lang"); +        TString text = node.Value<TString>(); +        TConstNode child = node.GetFirstChild(""); +        // edit node +        TNode node = child.ConstCast(); +        node.DelAttr("id"); +        node.SetAttr("x", 2); +        node.SetValue(5); +        node.AddText(" apples"); +    } + +    // edit documents with copy-paste +    NXml::TDocument xml2("<xpath><node/></xpath>", NXml::TDocument::String); +    NXml::TNode place = xml2.Root().Node("xpath/node"); +    // copy node's subtree from one document to another +    place.AddChild(xml.Root()); +    // save (render) single element +    TString modifiedNode = place.ToString(); +    // save whole document with optional encoding +    TString modifiedDoc  = xml2.ToString("ISO-8559-1"); + + +See xml-document_ut.cpp for more examples. diff --git a/library/cpp/xml/document/libxml-guards.h b/library/cpp/xml/document/libxml-guards.h new file mode 100644 index 00000000000..4188cecff12 --- /dev/null +++ b/library/cpp/xml/document/libxml-guards.h @@ -0,0 +1,50 @@ +#pragma once + +#include <library/cpp/xml/init/ptr.h> +#include <util/generic/ptr.h> +#include <libxml/xmlstring.h> +#include <libxml/tree.h> +#include <libxml/xpath.h> +#include <libxml/uri.h> +#include <libxml/xmlsave.h> + +namespace NXml { +    namespace NDetail { +        struct TSignedCharPtrTraits { +            static void Destroy(char* handle) { +                xmlFree(handle); +            } +        }; + +        struct TCharPtrTraits { +            static void Destroy(xmlChar* handle) { +                xmlFree(handle); +            } +        }; + +        struct TOutputBufferPtrTraits { +            static void Destroy(xmlOutputBufferPtr handle) { +                xmlOutputBufferClose(handle); +            } +        }; + +        struct TSaveCtxtPtrTraits { +            static void Destroy(xmlSaveCtxtPtr handle) { +                xmlSaveClose(handle); +            } +        }; + +    } + +    typedef TxmlXPathContextPtr TXPathContextPtr; +    typedef TxmlXPathObjectPtr TXPathObjectPtr; +    typedef TAutoPtr<char, NDetail::TSignedCharPtrTraits> TSignedCharPtr; +    typedef TAutoPtr<xmlChar, NDetail::TCharPtrTraits> TCharPtr; +    typedef TxmlDocHolder TDocHolder; +    typedef TxmlURIPtr TURIPtr; +    typedef TxmlNodePtr TNodePtr; +    typedef TAutoPtr<xmlOutputBuffer, NDetail::TOutputBufferPtrTraits> TOutputBufferPtr; +    typedef TxmlParserCtxtPtr TParserCtxtPtr; +    typedef TAutoPtr<xmlSaveCtxt, NDetail::TSaveCtxtPtrTraits> TSaveCtxtPtr; + +} diff --git a/library/cpp/xml/document/node-attr.h b/library/cpp/xml/document/node-attr.h new file mode 100644 index 00000000000..6e74403943c --- /dev/null +++ b/library/cpp/xml/document/node-attr.h @@ -0,0 +1,209 @@ +#pragma once + +#include "xml-document-decl.h" +#include "libxml-guards.h" +#include <util/stream/str.h> +#include <util/string/cast.h> + +namespace NXml { +#define THROW(x, y) ythrow yexception() << #x << ": " << y + +    // libxml defines unsigned char -> xmlChar, +    // and all functions use xmlChar. +    inline static const char* CAST2CHAR(const xmlChar* x) { +        return reinterpret_cast<const char*>(x); +    } +    inline static const xmlChar* XMLCHAR(const char* x) { +        return reinterpret_cast<const xmlChar*>(x); +    } + +    template <class T> +    void TNode::AttrInternal(TCharPtr& value, T& res, TStringBuf errContext) const { +        try { +            res = FromString<T>(CAST2CHAR(value.Get())); +        } catch (TFromStringException&) { +            THROW(XmlException, "Failed to convert string " << TString{TStringBuf(CAST2CHAR(value.Get())).substr(0, 50)}.Quote() << " from '" << errContext << "' to requested type"); +        } +    } + +    template <> +    inline void TNode::AttrInternal(TCharPtr& value, TString& res, TStringBuf /*errContext*/) const { +        TString tmp(CAST2CHAR(value.Get())); +        res.swap(tmp); +    } + +    template <class T> +    T TNode::Attr(TZtStringBuf name) const { +        TCharPtr value(xmlGetProp(NodePointer, XMLCHAR(name.c_str()))); +        if (!value) { +            THROW(AttributeNotFound, Path() << "@" << name); +        } + +        T t; +        AttrInternal(value, t, name); +        return t; +    } + +    template <class T> +    T TNode::Attr(TZtStringBuf name, const T& defvalue) const { +        TCharPtr attr(xmlGetProp(NodePointer, XMLCHAR(name.c_str()))); +        if (!attr) { +            return defvalue; +        } + +        T t; +        AttrInternal(attr, t, name); +        return t; +    } + +    template <class T> +    void TNode::Attr(TZtStringBuf name, T& value) const { +        TCharPtr attr(xmlGetProp(NodePointer, XMLCHAR(name.c_str()))); +        if (!attr) { +            THROW(AttributeNotFound, Path() << name); +        } + +        AttrInternal(attr, value, name); +    } + +    template <class T> +    void TNode::Attr(TZtStringBuf name, T& value, const T& defvalue) const { +        TCharPtr attr(xmlGetProp(NodePointer, XMLCHAR(name.c_str()))); + +        if (!attr) { +            value = defvalue; +        } else { +            AttrInternal(attr, value, name); +        } +    } + +    template <class T> +    T TNode::Value() const { +        if (!NodePointer || xmlIsBlankNode(NodePointer)) { +            THROW(NodeIsBlank, Path()); +        } + +        TCharPtr val(xmlNodeGetContent(NodePointer)); +        T t; +        AttrInternal(val, t, this->Name()); +        return t; +    } + +    template <class T> +    T TNode::Value(const T& defvalue) const { +        if (!NodePointer || xmlIsBlankNode(NodePointer)) { +            return defvalue; +        } + +        TCharPtr val(xmlNodeGetContent(NodePointer)); +        T t; +        AttrInternal(val, t, this->Name()); +        return t; +    } + +    template <class T> +    typename std::enable_if<!std::is_convertible_v<T, TStringBuf>, void>::type +    TNode::SetValue(const T& value) { +        TStringStream ss; +        ss << value; +        SetValue(ss.Str()); +    } + +    inline void TNode::SetValue(TStringBuf value) { +        xmlNodeSetContent(NodePointer, XMLCHAR("")); +        xmlNodeAddContentLen(NodePointer, XMLCHAR(value.data()), value.Size()); +    } + +    inline void TNode::SetAttr(TZtStringBuf name, TZtStringBuf value) { +        xmlAttr* attr = xmlSetProp(NodePointer, XMLCHAR(name.c_str()), XMLCHAR(value.c_str())); + +        if (!attr) { +            THROW(XmlException, "Can't set node attribute <" +                                    << name +                                    << "> to <" +                                    << value +                                    << ">"); +        } +    } + +    template <class T> +    typename std::enable_if<!std::is_convertible_v<T, TZtStringBuf>, void>::type +    TNode::SetAttr(TZtStringBuf name, const T& value) { +        TStringStream ss; +        ss << value; +        SetAttr(name, TZtStringBuf(ss.Str())); +    } + +    inline void TNode::SetAttr(TZtStringBuf name) { +        xmlAttr* attr = xmlSetProp(NodePointer, XMLCHAR(name.c_str()), nullptr); + +        if (!attr) { +            THROW(XmlException, "Can't set node empty attribute <" +                                    << name +                                    << ">"); +        } +    } + +    inline void TNode::DelAttr(TZtStringBuf name) { +        if (xmlUnsetProp(NodePointer, XMLCHAR(name.c_str())) < 0) +            THROW(XmlException, "Can't delete node attribute <" +                                    << name +                                    << ">"); +    } + +    template <class T> +    typename std::enable_if<!std::is_convertible_v<T, TZtStringBuf>, TNode>::type +    TNode::AddChild(TZtStringBuf name, const T& value) { +        TStringStream ss; +        ss << value; +        return AddChild(name, TZtStringBuf(ss.Str())); +    } + +    inline TNode TNode::AddChild(TZtStringBuf name, TZtStringBuf value) { +        if (IsNull()) { +            THROW(XmlException, "addChild [name=" << name << ", value=" << value +                                                  << "]: can't add child to null node"); +        } + +        xmlNode* child = nullptr; + +        if (value.empty()) { +            child = xmlNewTextChild(NodePointer, nullptr, XMLCHAR(name.c_str()), nullptr); +        } else { +            child = xmlNewTextChild( +                NodePointer, nullptr, XMLCHAR(name.c_str()), XMLCHAR(value.c_str())); +        } + +        if (!child) { +            THROW(XmlException, "addChild [name=" << name << ", value=" << value +                                                  << "]: xmlNewTextChild returned NULL"); +        } + +        return TNode(DocPointer, child); +    } + +    template <class T> +    typename std::enable_if<!std::is_convertible_v<T, TStringBuf>, TNode>::type +    TNode::AddText(const T& value) { +        TStringStream ss; +        ss << value; +        return AddText(ss.Str()); +    } + +    inline TNode TNode::AddText(TStringBuf value) { +        if (IsNull()) { +            THROW(XmlException, "addChild [value=" << value +                                                   << "]: can't add child to null node"); +        } + +        xmlNode* child = xmlNewTextLen((xmlChar*)value.data(), value.size()); +        child = xmlAddChild(NodePointer, child); + +        if (!child) { +            THROW(XmlException, "addChild [value=" << value +                                                   << "]: xmlNewTextChild returned NULL"); +        } + +        return TNode(DocPointer, child); +    } +} diff --git a/library/cpp/xml/document/ut/ya.make b/library/cpp/xml/document/ut/ya.make new file mode 100644 index 00000000000..e955448c66a --- /dev/null +++ b/library/cpp/xml/document/ut/ya.make @@ -0,0 +1,11 @@ +UNITTEST_FOR(library/cpp/xml/document) + +OWNER(finder) + +SRCS( +    xml-document_ut.cpp +    xml-textreader_ut.cpp +    xml-options_ut.cpp +) + +END() diff --git a/library/cpp/xml/document/xml-document-decl.h b/library/cpp/xml/document/xml-document-decl.h new file mode 100644 index 00000000000..bfda1fb7e6e --- /dev/null +++ b/library/cpp/xml/document/xml-document-decl.h @@ -0,0 +1,718 @@ +#pragma once + +#include <library/cpp/string_utils/ztstrbuf/ztstrbuf.h> + +#include <util/generic/string.h> +#include <util/generic/vector.h> +#include <util/stream/output.h> +#include <util/stream/str.h> +#include <algorithm> +#include "libxml-guards.h" + +namespace NXml { +    class TNode; + +    class TConstNodes; +    class TConstNode; + +    using TXPathContext = xmlXPathContext; + +    class TDocument { +    public: +        enum Source { +            File, +            String, +            RootName, +        }; + +    public: +        /** +        * create TDocument +        * @param source: filename, XML string, or name for the root element (depends on @src) +        * @param src: source type: File | String | RootName +        * throws if file not found or cannot be parsed +        */ +        TDocument(const TString& source, Source type = File); + +    public: +        TDocument(const TDocument& that) = delete; +        TDocument& operator=(const TDocument& that) = delete; + +        TDocument(TDocument&& that); +        TDocument& operator=(TDocument&& that); + +        /** +        * get root element +        */ +        TNode Root(); +        TConstNode Root() const; + +        void Save(IOutputStream& stream, TZtStringBuf enc = "", bool shouldFormat = true) const { +            int bufferSize = 0; +            xmlChar* xmlBuff = nullptr; +            const char* encoding = enc.size() ? enc.data() : Doc->encoding ? nullptr : "UTF-8"; +            xmlDocDumpFormatMemoryEnc(Doc.Get(), &xmlBuff, &bufferSize, encoding, shouldFormat); +            TCharPtr xmlCharBuffPtr(xmlBuff); +            stream.Write(xmlBuff, bufferSize); +        } + +        TString ToString(TZtStringBuf enc = "", bool shouldFormat = true) const { +            TStringStream s; +            Save(s, enc, shouldFormat); +            return s.Str(); +        } + +        void Swap(TDocument& that) { +            std::swap(this->Doc, that.Doc); +        } + +        xmlDocPtr GetImpl() { +            return Doc.Get(); +        } + +    private: +        void ParseFile(const TString& file); +        void ParseString(TZtStringBuf xml); + +        TDocument(TDocHolder doc) +            : Doc(std::move(doc)) +        { +        } + +        TDocHolder Doc; +    }; + +    struct TNamespaceForXPath { +        TString Prefix; +        TString Url; +    }; +    typedef TVector<TNamespaceForXPath> TNamespacesForXPath; + +    class TConstNodes { +    private: +        struct TConstNodesRef { +            explicit TConstNodesRef(TConstNodes& n) +                : r_(n) +            { +            } +            TConstNodes& r_; +        }; + +    public: +        TConstNodes(const TConstNodes& nodes); +        TConstNodes& operator=(const TConstNodes& nodes); + +        TConstNodes(TConstNodesRef ref); +        TConstNodes& operator=(TConstNodesRef ref); + +        operator TConstNodesRef(); + +        /** +        * get node by id +        * @param number: node id +        */ +        TConstNode operator[](size_t number) const; + +        /** +        * get number of nodes +        */ +        size_t Size() const { +            return SizeValue; +        } +        size_t size() const { +            return SizeValue; +        } + +        struct TNodeIter { +            const TConstNodes& Nodes; +            size_t Index; +            TConstNode operator*() const; +            bool operator==(const TNodeIter& other) const { +                return Index == other.Index; +            } +            bool operator!=(const TNodeIter& other) const { +                return !(*this == other); +            } +            TNodeIter operator++() { +                Index++; +                return *this; +            } +        }; +        TNodeIter begin() const { +            return TNodeIter{*this, 0}; +        } +        TNodeIter end() const { +            return TNodeIter{*this, size()}; +        } + +    private: +        friend class TDocument; +        friend class TConstNode; +        friend class TNode; + +        TConstNodes(xmlDoc* doc, TXPathObjectPtr obj); + +        size_t SizeValue; +        xmlDoc* Doc; +        TXPathObjectPtr Obj; +    }; + +    class TNode { +    public: +        friend class TDocument; +        friend class TConstNode; +        friend class TTextReader; + +        /** +        * check if node is null +        */ +        bool IsNull() const; + +        /** +        * check if node is element node +        */ +        bool IsElementNode() const; + +        /** +        * Create xpath context to be used later for fast xpath evaluation. +        * @param nss: explicitly specify XML namespaces to use and their prefixes +        * +        * For better performance, when you need to evaluate several xpath expressions, +        * it makes sense to create a context, load namespace prefixes once +        * and use the context several times in Node(), Nodes(), XPath() function calls for several nodes. +        * The context may be used with any node of the current document, but +        * cannot be shared between different XML documents. +        */ +        TXPathContextPtr CreateXPathContext(const TNamespacesForXPath& nss = TNamespacesForXPath()) const; + +        /** +        * get all element nodes matching given xpath expression +        * @param xpath: xpath expression +        * @param quiet: don't throw exception if zero nodes found +        * @param ns: explicitly specify XML namespaces to use and their prefixes +        * +        * For historical reasons, this only works for *element* nodes. +        * Use the XPath function if you need other kinds of nodes. +        */ +        TConstNodes Nodes(TZtStringBuf xpath, bool quiet = false, const TNamespacesForXPath& ns = TNamespacesForXPath()) const; + +        /** +        * get all element nodes matching given xpath expression +        * @param xpath: xpath expression +        * @param quiet: don't throw exception if zero nodes found +        * @param ctxt: reusable xpath context +        * +        * For historical reasons, this only works for *element* nodes. +        * Use the XPath function if you need other kinds of nodes. +        */ +        TConstNodes Nodes(TZtStringBuf xpath, bool quiet, TXPathContext& ctxt) const; + +        /** +        * get all nodes matching given xpath expression +        * @param xpath: xpath expression +        * @param quiet: don't throw exception if zero nodes found +        * @param ns: explicitly specify XML namespaces to use and their prefixes +        */ +        TConstNodes XPath(TZtStringBuf xpath, bool quiet = false, const TNamespacesForXPath& ns = TNamespacesForXPath()) const; + +        /** +        * get all nodes matching given xpath expression +        * @param xpath: xpath expression +        * @param quiet: don't throw exception if zero nodes found +        * @param ctxt: reusable xpath context +        */ +        TConstNodes XPath(TZtStringBuf xpath, bool quiet, TXPathContext& ctxt) const; + +        /** +        * get the first element node matching given xpath expression +        * @param xpath: path to node (from current node) +        * @param quiet: don't throw exception if node not found, +        *               return null node (@see IsNull()) +        * @param ns: explicitly specify XML namespaces to use and their prefixes +        * +        * For historical reasons, this only works for *element* nodes. +        * Use the XPath function if you need other kinds of nodes. +        */ +        /// @todo: quiet should be default, empty nodeset is not an error +        TNode Node(TZtStringBuf xpath, bool quiet = false, const TNamespacesForXPath& ns = TNamespacesForXPath()); +        TConstNode Node(TZtStringBuf xpath, bool quiet = false, const TNamespacesForXPath& ns = TNamespacesForXPath()) const; + +        /** +        * get the first element node matching given xpath expression +        * @param xpath: path to node (from current node) +        * @param quiet: don't throw exception if node not found, +        *               return null node (@see IsNull()) +        * @param ctxt: reusable xpath context +        * +        * For historical reasons, this only works for *element* nodes. +        * Use the XPath function if you need other kinds of nodes. +        */ +        TNode Node(TZtStringBuf xpath, bool quiet, TXPathContext& ctxt); +        TConstNode Node(TZtStringBuf xpath, bool quiet, TXPathContext& ctxt) const; + +        /** +        * get node first child +        * @param name: child name +        * @note if name is empty, returns the first child node of type "element" +        * @note returns null node if no child found +        */ +        TNode FirstChild(TZtStringBuf name); +        TConstNode FirstChild(TZtStringBuf name) const; + +        TNode FirstChild(); +        TConstNode FirstChild() const; + +        /** +        * get parent node +        * throws exception if has no parent +        */ +        TNode Parent(); +        TConstNode Parent() const; + +        /** +        * get node neighbour +        * @param name: neighbour name +        * @note if name is empty, returns the next sibling node of type "element" +        * @node returns null node if no neighbour found +        */ +        TNode NextSibling(TZtStringBuf name); +        TConstNode NextSibling(TZtStringBuf name) const; + +        TNode NextSibling(); +        TConstNode NextSibling() const; + +        /** +        * create child node +        * @param name: child name +        * returns new empty node +        */ +        TNode AddChild(TZtStringBuf name); + +        /** +        * create child node with given value +        * @param name: child name +        * @param value: node value +        */ +        template <class T> +        typename std::enable_if<!std::is_convertible_v<T, TZtStringBuf>, TNode>::type +        AddChild(TZtStringBuf name, const T& value); + +        TNode AddChild(TZtStringBuf name, TZtStringBuf value); + +        /** +        * add child node, making recursive copy of original +        * @param node: node to copy from +        * returns added node +        */ +        TNode AddChild(const TConstNode& node); + +        /** +        * create text child node +        * @param name: child name +        * @param value: node value +        */ +        template <class T> +        typename std::enable_if<!std::is_convertible_v<T, TStringBuf>, TNode>::type +        AddText(const T& value); + +        TNode AddText(TStringBuf value); + +        /** +        * get node attribute +        * @param name: attribute name +        * throws exception if attribute not found +        */ +        template <class T> +        T Attr(TZtStringBuf name) const; + +        /** +        * get node attribute +        * @param name: attribute name +        * returns default value if attribute not found +        */ +        template <class T> +        T Attr(TZtStringBuf name, const T& defvalue) const; + +        /** +        * get node attribute +        * @param name: attribute name +        * @param value: return-value +        * throws exception if attribute not found +        */ +        template <class T> +        void Attr(TZtStringBuf name, T& value) const; + +        /** +        * get node attribute +        * @param name: attribute name +        * @param defvalue: default value +        * @param value: return-value +        * returns default value if attribute not found, attr value otherwise +        */ +        template <class T> +        void Attr(TZtStringBuf name, T& value, const T& defvalue) const; + +        /** +        * get node value (text) +        * @throws exception if node is blank +        */ +        template <class T> +        T Value() const; + +        /** +        * get node value +        * @param defvalue: default value +        * returns default value if node is blank +        */ +        template <class T> +        T Value(const T& defvalue) const; + +        /** +        * set node value +        * @param value: new text value +        */ +        template <class T> +        typename std::enable_if<!std::is_convertible_v<T, TStringBuf>, void>::type +        SetValue(const T& value); + +        void SetValue(TStringBuf value); + +        /** +        * set/reset node attribute value, +        * if attribute does not exist, it'll be created +        * @param name: attribute name +        * @param value: attribute value +        */ +        template<class T> +        typename std::enable_if<!std::is_convertible_v<T, TZtStringBuf>, void>::type +        SetAttr(TZtStringBuf name, const T& value); + +        void SetAttr(TZtStringBuf name, TZtStringBuf value); + +        void SetAttr(TZtStringBuf name); + +        /** +        * delete node attribute +        * @param name: attribute name +        */ +        void DelAttr(TZtStringBuf name); + +        /** +        * set node application data +        * @param priv: new application data pointer +        */ +        void SetPrivate(void* priv); + +        /** +        * @return application data pointer, passed by SetPrivate +        */ +        void* GetPrivate() const; + +        /** +        * get node name +        */ +        TString Name() const; + +        /** +        * get node xpath +        */ +        TString Path() const; + +        /** +        * get node xml representation +        */ +        TString ToString(TZtStringBuf enc = "") const { +            TStringStream s; +            Save(s, enc); +            return s.Str(); +        } +        void Save(IOutputStream& stream, TZtStringBuf enc = "", bool shouldFormat = false) const; +        void SaveAsHtml(IOutputStream& stream, TZtStringBuf enc = "", bool shouldFormat = false) const; + +        /** +        * get pointer to internal node +        */ +        xmlNode* GetPtr(); +        const xmlNode* GetPtr() const; + +        /** +        * check if node is text-only node +        */ +        bool IsText() const; + +        /** +        * unlink node from parent and free +        */ +        void Remove(); + +        /** +        * constructs null node +        */ +        TNode() +            : NodePointer(nullptr) +            , DocPointer(nullptr) +        { +        } + +    private: +        friend class TConstNodes; + +        TNode(xmlDoc* doc, xmlNode* node) +            : NodePointer(node) +            , DocPointer(doc) +        { +        } + +        TNode Find(xmlNode* start, TZtStringBuf name); + +        template <class T> +        void AttrInternal(TCharPtr& value, T& res, TStringBuf errContext) const; + +        void SaveInternal(IOutputStream& stream, TZtStringBuf enc, int options) const; + +        xmlNode* NodePointer; +        xmlDoc* DocPointer; +    }; + +    class TConstNode { +    public: +        friend class TDocument; +        friend class TConstNodes; +        friend class TNode; +        /** +        * check if node is null +        */ +        bool IsNull() const { +            return ActualNode.IsNull(); +        } + +        bool IsElementNode() const { +            return ActualNode.IsElementNode(); +        } + +        TConstNode Parent() const { +            return ActualNode.Parent(); +        } + +        /** +        * Create xpath context to be used later for fast xpath evaluation. +        * @param nss: explicitly specify XML namespaces to use and their prefixes +        */ +        TXPathContextPtr CreateXPathContext(const TNamespacesForXPath& nss = TNamespacesForXPath()) const { +            return ActualNode.CreateXPathContext(nss); +        } + +        /** +        * get all element nodes matching given xpath expression +        * @param xpath: xpath expression +        * @param quiet: don't throw exception if zero nodes found +        * @param ns: explicitly specify XML namespaces to use and their prefixes +        * +        * For historical reasons, this only works for *element* nodes. +        * Use the XPath function if you need other kinds of nodes. +        */ +        TConstNodes Nodes(TZtStringBuf xpath, bool quiet = false, const TNamespacesForXPath& ns = TNamespacesForXPath()) const { +            return ActualNode.Nodes(xpath, quiet, ns); +        } + +        /** +        * get all element nodes matching given xpath expression +        * @param xpath: xpath expression +        * @param quiet: don't throw exception if zero nodes found +        * @param ctxt: reusable xpath context +        * +        * For historical reasons, this only works for *element* nodes. +        * Use the XPath function if you need other kinds of nodes. +        */ +        TConstNodes Nodes(TZtStringBuf xpath, bool quiet, TXPathContext& ctxt) const { +            return ActualNode.Nodes(xpath, quiet, ctxt); +        } + +        /** +        * get all nodes matching given xpath expression +        * @param xpath: xpath expression +        * @param quiet: don't throw exception if zero nodes found +        * @param ns: explicitly specify XML namespaces to use and their prefixes +        */ +        TConstNodes XPath(TZtStringBuf xpath, bool quiet = false, const TNamespacesForXPath& ns = TNamespacesForXPath()) const { +            return ActualNode.XPath(xpath, quiet, ns); +        } + +        /** +        * get all nodes matching given xpath expression +        * @param xpath: xpath expression +        * @param quiet: don't throw exception if zero nodes found +        * @param ctxt: reusable xpath context +        */ +        TConstNodes XPath(TZtStringBuf xpath, bool quiet, TXPathContext& ctxt) const { +            return ActualNode.XPath(xpath, quiet, ctxt); +        } + +        /** +        * get the first element node matching given xpath expression +        * @param xpath: path to node (from current node) +        * @param quiet: don't throw exception if node not found, +        *               return null node (@see IsNull()) +        * @param ns: explicitly specify XML namespaces to use and their prefixes +        * +        * For historical reasons, this only works for *element* nodes. +        * Use the XPath function if you need other kinds of nodes. +        */ +        TConstNode Node(TZtStringBuf xpath, bool quiet = false, const TNamespacesForXPath& ns = TNamespacesForXPath()) const { +            return ActualNode.Node(xpath, quiet, ns); +        } + +        /** +        * get the first element node matching given xpath expression +        * @param xpath: path to node (from current node) +        * @param quiet: don't throw exception if node not found, +        *               return null node (@see IsNull()) +        * @param ctxt: reusable xpath context +        * +        * For historical reasons, this only works for *element* nodes. +        * Use the XPath function if you need other kinds of nodes. +        */ +        TConstNode Node(TZtStringBuf xpath, bool quiet, TXPathContext& ctxt) const { +            return ActualNode.Node(xpath, quiet, ctxt); +        } + +        TConstNode FirstChild(TZtStringBuf name) const { +            return ActualNode.FirstChild(name); +        } + +        TConstNode FirstChild() const { +            return ActualNode.FirstChild(); +        } + +        /** +        * get node neighbour +        * @param name: neighbour name +        * throws exception if no neighbour found +        */ +        TConstNode NextSibling(TZtStringBuf name) const { +            return ActualNode.NextSibling(name); +        } + +        TConstNode NextSibling() const { +            return ActualNode.NextSibling(); +        } + +        /** +        * get node attribute +        * @param name: attribute name +        * throws exception if attribute not found +        */ +        template <class T> +        T Attr(TZtStringBuf name) const { +            return ActualNode.Attr<T>(name); +        } + +        /** +        * get node attribute +        * @param name: attribute name +        * returns default value if attribute not found +        */ +        template <class T> +        T Attr(TZtStringBuf name, const T& defvalue) const { +            return ActualNode.Attr(name, defvalue); +        } + +        /** +        * get node attribute +        * @param name: attribute name +        * @param value: return-value +        * throws exception if attribute not found +        */ +        template <class T> +        void Attr(TZtStringBuf name, T& value) const { +            return ActualNode.Attr(name, value); +        } + +        /** +        * get node attribute +        * @param name: attribute name +        * @param defvalue: default value +        * @param value: return-value +        * returns default value if attribute not found, attr value otherwise +        */ +        template <class T> +        void Attr(TZtStringBuf name, T& value, const T& defvalue) const { +            return ActualNode.Attr(name, value, defvalue); +        } + +        /** +        * get node value (text) +        * @throws exception if node is blank +        */ +        template <class T> +        T Value() const { +            return ActualNode.Value<T>(); +        } + +        /** +        * get node value +        * @param defvalue: default value +        * returns default value if node is blank +        */ +        template <class T> +        T Value(const T& defvalue) const { +            return ActualNode.Value(defvalue); +        } + +        /** +        * get node name +        */ +        TString Name() const { +            return ActualNode.Name(); +        } + +        /** +        * @return application data pointer, passed by SetPrivate +        */ +        void* GetPrivate() const { +            return ActualNode.GetPrivate(); +        } + +        /** +        * get pointer to internal node +        */ +        const xmlNode* GetPtr() const { +            return ActualNode.GetPtr(); +        } + +        /** +        * check if node is text-only node +        */ +        bool IsText() const { +            return ActualNode.IsText(); +        } + +        /** +        * get node xpath +        */ +        TString Path() const { +            return ActualNode.Path(); +        } + +        /** +        * get node xml representation +        */ +        TString ToString(TZtStringBuf enc = "") const { +            return ActualNode.ToString(enc); +        } + +        TConstNode() = default; +        TConstNode(TNode node) +            : ActualNode(node) +        { +        } + +        TNode ConstCast() const { +            return ActualNode; +        } + +    private: +        TNode ActualNode; +    }; + +} diff --git a/library/cpp/xml/document/xml-document.cpp b/library/cpp/xml/document/xml-document.cpp new file mode 100644 index 00000000000..18a554d7321 --- /dev/null +++ b/library/cpp/xml/document/xml-document.cpp @@ -0,0 +1,393 @@ +#include "xml-document.h" + +#include <libxml/xinclude.h> +#include <libxml/xpathInternals.h> + +#include <library/cpp/xml/init/init.h> + +#include <util/generic/yexception.h> +#include <util/folder/dirut.h> + +namespace { +    struct TInit { +        inline TInit() { +            NXml::InitEngine(); +        } +    } initer; +} + +namespace NXml { +    TDocument::TDocument(const TString& xml, Source type) { +        switch (type) { +            case File: +                ParseFile(xml); +                break; +            case String: +                ParseString(xml); +                break; +            case RootName: { +                TDocHolder doc(xmlNewDoc(XMLCHAR("1.0"))); +                if (!doc) +                    THROW(XmlException, "Can't create xml document."); +                doc->encoding = xmlStrdup(XMLCHAR("utf-8")); + +                TNodePtr node(xmlNewNode(nullptr, XMLCHAR(xml.c_str()))); +                if (!node) +                    THROW(XmlException, "Can't create root node."); +                xmlDocSetRootElement(doc.Get(), node.Get()); +                Y_UNUSED(node.Release()); +                Doc = std::move(doc); +            } break; +            default: +                THROW(InvalidArgument, "Wrong source type"); +        } +    } + +    TDocument::TDocument(TDocument&& doc) +        : Doc(std::move(doc.Doc)) +    { +    } + +    TDocument& TDocument::operator=(TDocument&& doc) { +        if (this != &doc) +            doc.Swap(*this); + +        return *this; +    } + +    void TDocument::ParseFile(const TString& file) { +        if (!NFs::Exists(file)) +            THROW(XmlException, "File " << file << " doesn't exist"); + +        TParserCtxtPtr pctx(xmlNewParserCtxt()); +        if (!pctx) +            THROW(XmlException, "Can't create parser context"); + +        TDocHolder doc(xmlCtxtReadFile(pctx.Get(), file.c_str(), nullptr, XML_PARSE_NOCDATA)); +        if (!doc) +            THROW(XmlException, "Can't parse file " << file); + +        int res = xmlXIncludeProcessFlags(doc.Get(), XML_PARSE_XINCLUDE | XML_PARSE_NOCDATA | XML_PARSE_NOXINCNODE); + +        if (res == -1) +            THROW(XmlException, "XIncludes processing failed"); + +        Doc = std::move(doc); +    } + +    void TDocument::ParseString(TZtStringBuf xml) { +        TParserCtxtPtr pctx(xmlNewParserCtxt()); +        if (pctx.Get() == nullptr) +            THROW(XmlException, "Can't create parser context"); + +        TDocHolder doc(xmlCtxtReadMemory(pctx.Get(), xml.c_str(), (int)xml.size(), nullptr, nullptr, XML_PARSE_NOCDATA)); + +        if (!doc) +            THROW(XmlException, "Can't parse string"); + +        Doc = std::move(doc); +    } + +    TNode TDocument::Root() { +        xmlNode* r = xmlDocGetRootElement(Doc.Get()); +        if (r == nullptr) +            THROW(XmlException, "TDocument hasn't root element"); + +        return TNode(Doc.Get(), r); +    } + +    TConstNode TDocument::Root() const { +        xmlNode* r = xmlDocGetRootElement(Doc.Get()); +        if (r == nullptr) +            THROW(XmlException, "TDocument hasn't root element"); + +        return TConstNode(TNode(Doc.Get(), r)); +    } + +    bool TNode::IsNull() const { +        return NodePointer == nullptr; +    } + +    bool TNode::IsElementNode() const { +        return !IsNull() && (NodePointer->type == XML_ELEMENT_NODE); +    } + +    TXPathContextPtr TNode::CreateXPathContext(const TNamespacesForXPath& nss) const { +        TXPathContextPtr ctx = xmlXPathNewContext(DocPointer); +        if (!ctx) +            THROW(XmlException, "Can't create empty xpath context"); + +        for (const auto& ns : nss) { +            const int r = xmlXPathRegisterNs(ctx.Get(), XMLCHAR(ns.Prefix.c_str()), XMLCHAR(ns.Url.c_str())); +            if (r != 0) +                THROW(XmlException, "Can't register namespace " << ns.Url << " with prefix " << ns.Prefix); +        } + +        return ctx; +    } + +    TConstNodes TNode::XPath(TZtStringBuf xpath, bool quiet, const TNamespacesForXPath& ns) const { +        TXPathContextPtr ctxt = CreateXPathContext(ns); +        return XPath(xpath, quiet, *ctxt); +    } + +    TConstNodes TNode::XPath(TZtStringBuf xpath, bool quiet, TXPathContext& ctxt) const { +        if (xmlXPathSetContextNode(NodePointer, &ctxt) != 0) +            THROW(XmlException, "Can't set xpath context node, probably the context is associated with another document"); + +        TXPathObjectPtr obj = xmlXPathEvalExpression(XMLCHAR(xpath.c_str()), &ctxt); +        if (!obj) +            THROW(XmlException, "Can't evaluate xpath expression " << xpath); + +        TConstNodes nodes(DocPointer, obj); + +        if (nodes.Size() == 0 && !quiet) +            THROW(NodeNotFound, xpath); + +        return nodes; +    } + +    TConstNodes TNode::Nodes(TZtStringBuf xpath, bool quiet, const TNamespacesForXPath& ns) const { +        TXPathContextPtr ctxt = CreateXPathContext(ns); +        return Nodes(xpath, quiet, *ctxt); +    } + +    TConstNodes TNode::Nodes(TZtStringBuf xpath, bool quiet, TXPathContext& ctxt) const { +        TConstNodes nodes = XPath(xpath, quiet, ctxt); +        if (nodes.Size() != 0 && !nodes[0].IsElementNode()) +            THROW(XmlException, "xpath points to non-element nodes: " << xpath); +        return nodes; +    } + +    TNode TNode::Node(TZtStringBuf xpath, bool quiet, const TNamespacesForXPath& ns) { +        TXPathContextPtr ctxt = CreateXPathContext(ns); +        return Node(xpath, quiet, *ctxt); +    } + +    TConstNode TNode::Node(TZtStringBuf xpath, bool quiet, const TNamespacesForXPath& ns) const { +        TXPathContextPtr ctxt = CreateXPathContext(ns); +        return Node(xpath, quiet, *ctxt); +    } + +    TNode TNode::Node(TZtStringBuf xpath, bool quiet, TXPathContext& ctxt) { +        TConstNodes n = Nodes(xpath, quiet, ctxt); + +        if (n.Size() == 0 && !quiet) +            THROW(NodeNotFound, xpath); + +        if (n.Size() == 0) +            return TNode(); +        else +            return n[0].ConstCast(); +    } + +    TConstNode TNode::Node(TZtStringBuf xpath, bool quiet, TXPathContext& ctxt) const { +        return const_cast<TNode*>(this)->Node(xpath, quiet, ctxt); +    } + +    TNode TNode::FirstChild(TZtStringBuf name) { +        if (IsNull()) +            THROW(XmlException, "Node is null"); + +        return Find(NodePointer->children, name); +    } + +    TConstNode TNode::FirstChild(TZtStringBuf name) const { +        return const_cast<TNode*>(this)->FirstChild(name); +    } + +    TNode TNode::FirstChild() { +        if (IsNull()) +            THROW(XmlException, "Node is null"); + +        return TNode(DocPointer, NodePointer->children); +    } + +    TConstNode TNode::FirstChild() const { +        return const_cast<TNode*>(this)->FirstChild(); +    } + +    TNode TNode::Parent() { +        if (nullptr == NodePointer->parent) +            THROW(XmlException, "Parent node not exists"); + +        return TNode(DocPointer, NodePointer->parent); +    } + +    TConstNode TNode::Parent() const { +        return const_cast<TNode*>(this)->Parent(); +    } + +    TNode TNode::NextSibling(TZtStringBuf name) { +        if (IsNull()) +            THROW(XmlException, "Node is null"); + +        return Find(NodePointer->next, name); +    } + +    TConstNode TNode::NextSibling(TZtStringBuf name) const { +        return const_cast<TNode*>(this)->NextSibling(name); +    } + +    TNode TNode::NextSibling() { +        if (IsNull()) +            THROW(XmlException, "Node is null"); + +        return TNode(DocPointer, NodePointer->next); +    } + +    TConstNode TNode::NextSibling() const { +        return const_cast<TNode*>(this)->NextSibling(); +    } + +    /* NOTE: by default child will inherit it's parent ns */ + +    TNode TNode::AddChild(TZtStringBuf name) { +        return AddChild(name, ""); +    } + +    /* NOTE: source node will be copied, as otherwise it will be double-freed from this and its own document */ + +    TNode TNode::AddChild(const TConstNode& node) { +        xmlNodePtr copy = xmlDocCopyNode(node.ConstCast().NodePointer, DocPointer, 1 /* recursive */); +        copy = xmlAddChild(NodePointer, copy); +        return TNode(DocPointer, copy); +    } + +    void TNode::SetPrivate(void* priv) { +        NodePointer->_private = priv; +    } + +    void* TNode::GetPrivate() const { +        return NodePointer->_private; +    } + +    TNode TNode::Find(xmlNode* start, TZtStringBuf name) { +        for (; start; start = start->next) +            if (start->type == XML_ELEMENT_NODE && (name.empty() || !xmlStrcmp(start->name, XMLCHAR(name.c_str())))) +                return TNode(DocPointer, start); + +        return TNode(); +    } + +    TString TNode::Name() const { +        if (IsNull()) +            THROW(XmlException, "Node is null"); + +        return CAST2CHAR(NodePointer->name); +    } + +    TString TNode::Path() const { +        TCharPtr path(xmlGetNodePath(NodePointer)); +        if (!!path) +            return CAST2CHAR(path.Get()); +        else +            return ""; +    } + +    xmlNode* TNode::GetPtr() { +        return NodePointer; +    } + +    const xmlNode* TNode::GetPtr() const { +        return NodePointer; +    } + +    bool TNode::IsText() const { +        if (IsNull()) +            THROW(XmlException, "Node is null"); + +        return NodePointer->type == XML_TEXT_NODE; +    } + +    void TNode::Remove() { +        xmlNode* nodePtr = GetPtr(); +        xmlUnlinkNode(nodePtr); +        xmlFreeNode(nodePtr); +    } + +    static int XmlWriteToOstream(void* context, const char* buffer, int len) { +        // possibly use to save doc as well +        IOutputStream* out = (IOutputStream*)context; +        out->Write(buffer, len); +        return len; +    } + +    void TNode::SaveInternal(IOutputStream& stream, TZtStringBuf enc, int options) const { +        const char* encoding = enc.size() ? enc.data() : "utf-8"; +        TSaveCtxtPtr ctx(xmlSaveToIO(XmlWriteToOstream, /* close */ nullptr, &stream, +                                     encoding, options)); +        if (xmlSaveTree(ctx.Get(), (xmlNode*)GetPtr()) < 0) +            THROW(XmlException, "Failed saving node to stream"); +    } + +    void TNode::Save(IOutputStream& stream, TZtStringBuf enc, bool shouldFormat) const { +        SaveInternal(stream, enc, shouldFormat ? XML_SAVE_FORMAT : 0); +    } + +    void TNode::SaveAsHtml(IOutputStream& stream, TZtStringBuf enc, bool shouldFormat) const { +        int options = XML_SAVE_AS_HTML; +        options |= shouldFormat ? XML_SAVE_FORMAT : 0; +        SaveInternal(stream, enc, options); +    } + +    TConstNodes::TConstNodes(const TConstNodes& nodes) +        : SizeValue(nodes.Size()) +        , Doc(nodes.Doc) +        , Obj(nodes.Obj) +    { +    } + +    TConstNodes& TConstNodes::operator=(const TConstNodes& nodes) { +        if (this != &nodes) { +            SizeValue = nodes.Size(); +            Doc = nodes.Doc; +            Obj = nodes.Obj; +        } + +        return *this; +    } + +    TConstNodes::TConstNodes(TConstNodesRef ref) +        : SizeValue(ref.r_.Size()) +        , Doc(ref.r_.Doc) +        , Obj(ref.r_.Obj) +    { +    } + +    TConstNodes& TConstNodes::operator=(TConstNodesRef ref) { +        if (this != &ref.r_) { +            SizeValue = ref.r_.Size(); +            Doc = ref.r_.Doc; +            Obj = ref.r_.Obj; +        } +        return *this; +    } + +    TConstNodes::operator TConstNodesRef() { +        return TConstNodesRef(*this); +    } + +    TConstNodes::TConstNodes(xmlDoc* doc, TXPathObjectPtr obj) +        : SizeValue(obj && obj->nodesetval ? obj->nodesetval->nodeNr : 0) +        , Doc(doc) +        , Obj(obj) +    { +    } + +    TConstNode TConstNodes::operator[](size_t number) const { +        if (number + 1 > Size()) +            THROW(XmlException, "index out of range " << number); + +        if (!Obj || !Obj->nodesetval) +            THROW(XmlException, "Broken TConstNodes object, Obj is null"); + +        xmlNode* node = Obj->nodesetval->nodeTab[number]; +        return TNode(Doc, node); +    } + +    TConstNode TConstNodes::TNodeIter::operator*() const { +        return Nodes[Index]; +    } + +} diff --git a/library/cpp/xml/document/xml-document.h b/library/cpp/xml/document/xml-document.h new file mode 100644 index 00000000000..829ba09cc48 --- /dev/null +++ b/library/cpp/xml/document/xml-document.h @@ -0,0 +1,4 @@ +#pragma once + +#include "xml-document-decl.h" +#include "node-attr.h" diff --git a/library/cpp/xml/document/xml-document_ut.cpp b/library/cpp/xml/document/xml-document_ut.cpp new file mode 100644 index 00000000000..9f537b75c4c --- /dev/null +++ b/library/cpp/xml/document/xml-document_ut.cpp @@ -0,0 +1,319 @@ +#include <library/cpp/testing/unittest/registar.h> +#include <util/generic/map.h> + +#include "xml-document.h" + +Y_UNIT_TEST_SUITE(TestXmlDocument) { +    Y_UNIT_TEST(Iteration) { +        NXml::TDocument xml( +            "<?xml version=\"1.0\"?>\n" +            "<root>qq<a><b></b></a>ww<c></c></root>", +            NXml::TDocument::String); + +        NXml::TConstNode root = xml.Root(); +        UNIT_ASSERT_EQUAL(root.Name(), "root"); +        NXml::TConstNode n = root.FirstChild().NextSibling(); +        UNIT_ASSERT_EQUAL(n.Name(), "a"); +        n = n.NextSibling().NextSibling(); +        UNIT_ASSERT_EQUAL(n.Name(), "c"); +    } + +    Y_UNIT_TEST(ParseString) { +        NXml::TDocument xml( +            "<?xml version=\"1.0\"?>\n" +            "<root>\n" +            "<a><b len=\"15\" correct=\"1\">hello world</b></a>\n" +            "<text>Некоторый текст</text>\n" +            "</root>", +            NXml::TDocument::String); + +        NXml::TConstNode root = xml.Root(); +        NXml::TConstNode b = root.Node("a/b"); +        UNIT_ASSERT_EQUAL(b.Attr<int>("len"), 15); +        UNIT_ASSERT_EQUAL(b.Attr<bool>("correct"), true); + +        NXml::TConstNode text = root.Node("text"); +        UNIT_ASSERT_EQUAL(text.Value<TString>(), "Некоторый текст"); +    } +    Y_UNIT_TEST(SerializeString) { +        NXml::TDocument xml("frob", NXml::TDocument::RootName); +        xml.Root().SetAttr("xyzzy", "Frobozz"); +        xml.Root().SetAttr("kulness", 0.3); +        xml.Root().SetAttr("timelimit", 3); + +        NXml::TNode authors = xml.Root().AddChild("authors"); +        authors.AddChild("graham").SetAttr("name", "Nelson"); +        authors.AddChild("zarf").SetValue("Andrew Plotkin"); +        authors.AddChild("emshort", "Emily Short"); + +        TString data = xml.ToString("utf-8"); +        UNIT_ASSERT_EQUAL(data, "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n" +                                "<frob xyzzy=\"Frobozz\" kulness=\"0.3\" timelimit=\"3\">\n" +                                "  <authors>\n" +                                "    <graham name=\"Nelson\"/>\n" +                                "    <zarf>Andrew Plotkin</zarf>\n" +                                "    <emshort>Emily Short</emshort>\n" +                                "  </authors>\n" +                                "</frob>\n"); +        // check default utf8 output with ru +        { +            NXml::TDocument xml2("frob", NXml::TDocument::RootName); +            xml2.Root().SetAttr("xyzzy", "привет =)"); +            UNIT_ASSERT_VALUES_EQUAL(xml2.ToString(), "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n" +                                                      "<frob xyzzy=\"привет =)\"/>\n"); +        } +    } +    Y_UNIT_TEST(XPathNs) { +        using namespace NXml; +        TDocument xml( +            "<?xml version=\"1.0\"?>\n" +            "<root xmlns='http://hello.com/hello'>\n" +            "<a><b len=\"15\" correct=\"1\">hello world</b></a>\n" +            "<text>Некоторый текст</text>\n" +            "</root>", +            TDocument::String); + +        TNamespacesForXPath nss; +        TNamespaceForXPath ns = {"h", "http://hello.com/hello"}; +        nss.push_back(ns); + +        TConstNode root = xml.Root(); +        TConstNode b = root.Node("h:a/h:b", false, nss); +        UNIT_ASSERT_EQUAL(b.Attr<int>("len"), 15); +        UNIT_ASSERT_EQUAL(b.Attr<bool>("correct"), true); + +        TConstNode text = root.Node("h:text", false, nss); +        UNIT_ASSERT_EQUAL(text.Value<TString>(), "Некоторый текст"); + +        // For performance you can create xpath context once using nss and pass it. +        TXPathContextPtr ctxt = root.CreateXPathContext(nss); +        UNIT_ASSERT(root.Node("text", true, *ctxt).IsNull()); +        UNIT_ASSERT_EXCEPTION(root.Node("text", false, *ctxt), yexception); +        UNIT_ASSERT_EQUAL(root.Node("h:text", false, *ctxt).Value<TString>(), "Некоторый текст"); +    } +    Y_UNIT_TEST(XmlNodes) { +        using namespace NXml; +        TDocument xml("<?xml version=\"1.0\"?>\n" +                      "<root>qq<a><b>asdfg</b></a>ww<c></c></root>", +                      NXml::TDocument::String); +        TNode root = xml.Root(); +        UNIT_ASSERT_EQUAL(root.Value<TString>(), "qqasdfgww"); +        TConstNode node = root.FirstChild(); +        UNIT_ASSERT_EQUAL(node.IsText(), true); +        UNIT_ASSERT_EQUAL(node.Value<TString>(), "qq"); +        node = node.NextSibling(); +        UNIT_ASSERT_EQUAL(node.IsText(), false); +        UNIT_ASSERT_EQUAL(node.Name(), "a"); +        UNIT_ASSERT_EQUAL(node.Value<TString>(), "asdfg"); +        node = node.NextSibling(); +        UNIT_ASSERT_EQUAL(node.IsText(), true); +        UNIT_ASSERT_EQUAL(node.Value<TString>(), "ww"); +        node = node.NextSibling(); +        UNIT_ASSERT_EQUAL(node.IsText(), false); +        UNIT_ASSERT_EQUAL(node.Name(), "c"); +        UNIT_ASSERT_EQUAL(node.Value<TString>(), ""); +        node = node.NextSibling(); +        UNIT_ASSERT_EQUAL(node.IsNull(), true); +        TStringStream iterLog; +        for (const auto& node2 : root.Nodes("/root/*")) { +            iterLog << node2.Name() << ';'; +        } +        UNIT_ASSERT_STRINGS_EQUAL(iterLog.Str(), "a;c;"); + +        // get only element nodes, ignore text nodes with empty "name" param +        node = root.FirstChild(TString()); +        UNIT_ASSERT_EQUAL(node.IsText(), false); +        UNIT_ASSERT_EQUAL(node.Name(), "a"); +        node = node.NextSibling(TString()); +        UNIT_ASSERT_EQUAL(node.IsText(), false); +        UNIT_ASSERT_EQUAL(node.Name(), "c"); + +        // use exact "name" to retrieve children and siblings +        node = root.FirstChild("a"); +        UNIT_ASSERT_EQUAL(node.IsNull(), false); +        UNIT_ASSERT_EQUAL(node.Name(), "a"); +        node = node.NextSibling("c"); +        UNIT_ASSERT_EQUAL(node.IsNull(), false); +        UNIT_ASSERT_EQUAL(node.Name(), "c"); +        node = root.FirstChild("c"); // skip "a" +        UNIT_ASSERT_EQUAL(node.IsNull(), false); +        UNIT_ASSERT_EQUAL(node.Name(), "c"); + +        // node not found: no exceptions, null nodes are returned +        node = root.FirstChild("b"); // b is not direct child of root +        UNIT_ASSERT_EQUAL(node.IsNull(), true); +        node = root.FirstChild("nosuchnode"); +        UNIT_ASSERT_EQUAL(node.IsNull(), true); +        node = root.FirstChild(); +        node = root.NextSibling("unknownnode"); +        UNIT_ASSERT_EQUAL(node.IsNull(), true); +        UNIT_ASSERT_EXCEPTION(node.Name(), yexception); +        UNIT_ASSERT_EXCEPTION(node.Value<TString>(), yexception); +        UNIT_ASSERT_EXCEPTION(node.IsText(), yexception); +    } +    Y_UNIT_TEST(DefVal) { +        using namespace NXml; +        TDocument xml("<?xml version=\"1.0\"?>\n" +                      "<root><a></a></root>", +                      NXml::TDocument::String); +        UNIT_ASSERT_EQUAL(xml.Root().Node("a", true).Node("b", true).Value<int>(3), 3); +    } +    Y_UNIT_TEST(NodesVsXPath) { +        using namespace NXml; +        TDocument xml("<?xml version=\"1.0\"?>\n" +                      "<root><a x=\"y\"></a></root>", +                      NXml::TDocument::String); +        UNIT_ASSERT_EXCEPTION(xml.Root().Nodes("/root/a/@x"), yexception); +        UNIT_ASSERT_VALUES_EQUAL(xml.Root().XPath("/root/a/@x").Size(), 1); +    } +    Y_UNIT_TEST(NodeIsFirst) { +        using namespace NXml; +        TDocument xml("<?xml version=\"1.0\"?>\n" +                      "<root><a x=\"y\">first</a>" +                      "<a>second</a></root>", +                      NXml::TDocument::String); +        UNIT_ASSERT_EXCEPTION(xml.Root().Node("/root/a/@x"), yexception); +        UNIT_ASSERT_STRINGS_EQUAL(xml.Root().Node("/root/a").Value<TString>(), "first"); +    } +    Y_UNIT_TEST(CopyNode) { +        using namespace NXml; +        // default-construct empty node +        TNode empty; +        // put to container +        TMap<int, TNode> nmap; +        nmap[2]; + +        // do copy +        TDocument xml("<?xml version=\"1.0\"?>\n" +                      "<root><a></a></root>", +                      TDocument::String); + +        TDocument xml2("<?xml version=\"1.0\"?>\n" +                       "<root><node><b>bold</b><i>ita</i></node></root>", +                       TDocument::String); + +        TNode node = xml2.Root().Node("//node"); +        TNode place = xml.Root().Node("//a"); + +        place.AddChild(node); + +        TStringStream s; +        xml.Save(s, "", false); +        UNIT_ASSERT_VALUES_EQUAL(s.Str(), +                                 "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" +                                 "<root><a><node><b>bold</b><i>ita</i></node></a></root>\n"); +    } + +    Y_UNIT_TEST(RenderNode) { +        using namespace NXml; +        { +            // no namespaces +            TDocument xml( +                "<?xml version=\"1.0\"?>\n" +                "<root>\n" +                "<a><b len=\"15\" correct=\"1\">hello world</b></a>\n" +                "<text>Некоторый текст</text>\n" +                "</root>", +                TDocument::String); +            TNode n = xml.Root().Node("//a"); +            UNIT_ASSERT_VALUES_EQUAL(n.ToString(), "<a><b len=\"15\" correct=\"1\">hello world</b></a>"); +        } +        { +            // namespaces +            TDocument xml( +                "<?xml version=\"1.0\"?>\n" +                "<root xmlns='http://hello.com/hello'>\n" +                "<a><b len=\"15\" correct=\"1\">hello world</b></a>\n" +                "<text>Некоторый текст</text>\n" +                "</root>", +                TDocument::String); +            TNamespacesForXPath nss; +            TNamespaceForXPath ns = {"h", "http://hello.com/hello"}; +            nss.push_back(ns); + +            TNode n = xml.Root().Node("//h:a", false, nss); +            UNIT_ASSERT_VALUES_EQUAL(n.ToString(), "<a><b len=\"15\" correct=\"1\">hello world</b></a>"); +        } +    } + +    Y_UNIT_TEST(ReuseXPathContext) { +        using namespace NXml; + +        TDocument xml( +            "<?xml version=\"1.0\"?>\n" +            "<root>\n" +            "<a><b><c>Hello, world!</c></b></a>\n" +            "<text x=\"10\">First</text>\n" +            "<text y=\"20\">Second</text>\n" +            "</root>", +            TDocument::String); + +        TXPathContextPtr rootCtxt = xml.Root().CreateXPathContext(); + +        // Check Node() +        TConstNode b = xml.Root().Node("a/b", false, *rootCtxt); + +        // We can use root node context for xpath evaluation in any node +        TConstNode c1 = b.Node("c", false, *rootCtxt); +        UNIT_ASSERT_EQUAL(c1.Value<TString>(), "Hello, world!"); + +        TXPathContextPtr bCtxt = b.CreateXPathContext(); +        TConstNode c2 = b.Node("c", false, *bCtxt); +        UNIT_ASSERT_EQUAL(c2.Value<TString>(), "Hello, world!"); + +        // Mixing contexts from different documents is forbidden +        TDocument otherXml("<root></root>", TDocument::String); +        TXPathContextPtr otherCtxt = otherXml.Root().CreateXPathContext(); +        UNIT_ASSERT_EXCEPTION(b.Node("c", false, *otherCtxt), yexception); + +        // Check Nodes() +        TConstNodes texts = xml.Root().Nodes("text", true, *rootCtxt); +        UNIT_ASSERT_EQUAL(texts.Size(), 2); + +        // Nodes() does't work for non-element nodes +        UNIT_ASSERT_EXCEPTION(xml.Root().Nodes("text/@x", true, *rootCtxt), yexception); + +        // Check XPath() +        TConstNodes ys = xml.Root().XPath("text/@y", true, *rootCtxt); +        UNIT_ASSERT_EQUAL(ys.Size(), 1); +        UNIT_ASSERT_EQUAL(ys[0].Value<int>(), 20); +    } + +    Y_UNIT_TEST(Html) { +        using namespace NXml; + +        TDocument htmlChunk("video", TDocument::RootName); +        TNode videoNode = htmlChunk.Root(); + +        videoNode.SetAttr("controls"); + +        TStringStream ss; +        videoNode.SaveAsHtml(ss); +        UNIT_ASSERT_EQUAL(ss.Str(), "<video controls></video>"); +    } + +    Y_UNIT_TEST(Move) { +        using namespace NXml; + +        TDocument xml1("foo", TDocument::RootName); +        xml1.Root().AddChild("bar"); + +        UNIT_ASSERT_VALUES_EQUAL(xml1.Root().ToString(), "<foo><bar/></foo>"); + +        TDocument xml2 = std::move(xml1); +        UNIT_ASSERT_EXCEPTION(xml1.Root(), yexception); +        UNIT_ASSERT_VALUES_EQUAL(xml2.Root().ToString(), "<foo><bar/></foo>"); +    } + +    Y_UNIT_TEST(StringConversion) { +        using namespace NXml; +        TDocument xml("foo", TDocument::RootName); +        auto root = xml.Root(); +        const TStringBuf stringBuf = "bar"; +        root.SetAttr("bar", stringBuf); +        const TString tString = "baz"; +        root.SetAttr("baz", tString); +        root.SetAttr("quux", "literal"); +        root.SetAttr("frob", 500); +    } +} diff --git a/library/cpp/xml/document/xml-options.cpp b/library/cpp/xml/document/xml-options.cpp new file mode 100644 index 00000000000..74e7545de3b --- /dev/null +++ b/library/cpp/xml/document/xml-options.cpp @@ -0,0 +1 @@ +#include "xml-options.h" diff --git a/library/cpp/xml/document/xml-options.h b/library/cpp/xml/document/xml-options.h new file mode 100644 index 00000000000..bb07da0cfbb --- /dev/null +++ b/library/cpp/xml/document/xml-options.h @@ -0,0 +1,67 @@ +#pragma once + +#include <contrib/libs/libxml/include/libxml/parser.h> + +namespace NXml { +    enum class EOption : int { +        // clang-format off +        Recover    = XML_PARSE_RECOVER, +        NoEnt      = XML_PARSE_NOENT, +        DTDLoad    = XML_PARSE_DTDLOAD, +        DTDAttr    = XML_PARSE_DTDATTR, +        DTDValid   = XML_PARSE_DTDVALID, +        NoError    = XML_PARSE_NOERROR, +        NoWarning  = XML_PARSE_NOWARNING, +        Pedantic   = XML_PARSE_PEDANTIC, +        NoBlanks   = XML_PARSE_NOBLANKS, +        SAX1       = XML_PARSE_SAX1, +        XInclude   = XML_PARSE_XINCLUDE, +        NoNet      = XML_PARSE_NONET, +        NoDict     = XML_PARSE_NODICT, +        NSClean    = XML_PARSE_NSCLEAN, +        NoCData    = XML_PARSE_NOCDATA, +        NoXInclude = XML_PARSE_NOXINCNODE, +        Compact    = XML_PARSE_COMPACT, +        Old10      = XML_PARSE_OLD10, +        NoBaseFix  = XML_PARSE_NOBASEFIX, +        Huge       = XML_PARSE_HUGE, +        OldSAX     = XML_PARSE_OLDSAX, +        IgnoreEnc  = XML_PARSE_IGNORE_ENC, +        BigLines   = XML_PARSE_BIG_LINES, +        // clang-format on +    }; + +    class TOptions { +    public: +        TOptions() +            : Mask(0) +        { +        } + +        template <typename... TArgs> +        TOptions(TArgs... args) +            : Mask(0) +        { +            Set(args...); +        } + +        TOptions& Set(EOption option) { +            Mask |= static_cast<int>(option); +            return *this; +        } + +        template <typename... TArgs> +        TOptions& Set(EOption arg, TArgs... args) { +            Set(arg); +            return Set(args...); +        } + +        int GetMask() const { +            return Mask; +        } + +    private: +        int Mask; +    }; + +} diff --git a/library/cpp/xml/document/xml-options_ut.cpp b/library/cpp/xml/document/xml-options_ut.cpp new file mode 100644 index 00000000000..9be16baf3d3 --- /dev/null +++ b/library/cpp/xml/document/xml-options_ut.cpp @@ -0,0 +1,26 @@ +#include "xml-options.h" + +#include <library/cpp/testing/unittest/registar.h> + +Y_UNIT_TEST_SUITE(TestXmlOptions) { +    Y_UNIT_TEST(SetHuge) { +        NXml::TOptions opts; +        opts.Set(NXml::EOption::Huge); +        UNIT_ASSERT_EQUAL(XML_PARSE_HUGE, opts.GetMask()); +    } + +    Y_UNIT_TEST(VariadicContructor) { +        NXml::TOptions opts(NXml::EOption::Huge, NXml::EOption::Compact, NXml::EOption::SAX1); +        UNIT_ASSERT_EQUAL(XML_PARSE_HUGE | XML_PARSE_COMPACT | XML_PARSE_SAX1, opts.GetMask()); +    } + +    Y_UNIT_TEST(Chaining) { +        NXml::TOptions opts; + +        opts +            .Set(NXml::EOption::Huge) +            .Set(NXml::EOption::Compact); + +        UNIT_ASSERT_EQUAL(XML_PARSE_HUGE | XML_PARSE_COMPACT, opts.GetMask()); +    } +} diff --git a/library/cpp/xml/document/xml-textreader.cpp b/library/cpp/xml/document/xml-textreader.cpp new file mode 100644 index 00000000000..b946f1fbf2f --- /dev/null +++ b/library/cpp/xml/document/xml-textreader.cpp @@ -0,0 +1,318 @@ +#include "xml-textreader.h" + +#include <contrib/libs/libxml/include/libxml/xmlreader.h> + +#include <util/generic/yexception.h> +#include <util/string/strip.h> +#include <util/system/compiler.h> + +namespace NXml { +    TTextReader::TTextReader(IInputStream& stream, const TOptions& options) +        : Stream(stream) +        , IsError(false) +    { +        Impl.Reset(xmlReaderForIO(ReadFromInputStreamCallback, nullptr, this, nullptr, nullptr, options.GetMask())); + +        if (!Impl) { +            ythrow yexception() << "cannot instantiate underlying xmlTextReader structure"; +        } +        SetupErrorHandler(); +        CheckForExceptions(); +    } + +    TTextReader::~TTextReader() { +    } + +    bool TTextReader::Read() { +        return BoolResult(xmlTextReaderRead(Impl.Get())); +    } + +    TString TTextReader::ReadInnerXml() const { +        return TempStringOrEmptyResult(xmlTextReaderReadInnerXml(Impl.Get())); +    } + +    TString TTextReader::ReadOuterXml() const { +        return TempStringOrEmptyResult(xmlTextReaderReadOuterXml(Impl.Get())); +    } + +    TString TTextReader::ReadString() const { +        return TempStringOrEmptyResult(xmlTextReaderReadString(Impl.Get())); +    } + +    bool TTextReader::ReadAttributeValue() const { +        return BoolResult(xmlTextReaderReadAttributeValue(Impl.Get())); +    } + +    int TTextReader::GetAttributeCount() const { +        return IntResult(xmlTextReaderAttributeCount(Impl.Get())); +    } + +    TStringBuf TTextReader::GetBaseUri() const { +        return ConstStringOrEmptyResult(xmlTextReaderConstBaseUri(Impl.Get())); +    } + +    int TTextReader::GetDepth() const { +        return IntResult(xmlTextReaderDepth(Impl.Get())); +    } + +    bool TTextReader::HasAttributes() const { +        return BoolResult(xmlTextReaderHasAttributes(Impl.Get())); +    } + +    bool TTextReader::HasValue() const { +        return BoolResult(xmlTextReaderHasValue(Impl.Get())); +    } + +    bool TTextReader::IsDefault() const { +        return BoolResult(xmlTextReaderIsDefault(Impl.Get())); +    } + +    bool TTextReader::IsEmptyElement() const { +        return BoolResult(xmlTextReaderIsEmptyElement(Impl.Get())); +    } + +    TStringBuf TTextReader::GetLocalName() const { +        return ConstStringOrEmptyResult(xmlTextReaderConstLocalName(Impl.Get())); +    } + +    TStringBuf TTextReader::GetName() const { +        return ConstStringOrEmptyResult(xmlTextReaderConstName(Impl.Get())); +    } + +    TStringBuf TTextReader::GetNamespaceUri() const { +        return ConstStringOrEmptyResult(xmlTextReaderConstNamespaceUri(Impl.Get())); +    } + +    TTextReader::ENodeType TTextReader::GetNodeType() const { +        return static_cast<ENodeType>(IntResult(xmlTextReaderNodeType(Impl.Get()))); +    } + +    TStringBuf TTextReader::GetPrefix() const { +        return ConstStringOrEmptyResult(xmlTextReaderConstPrefix(Impl.Get())); +    } + +    char TTextReader::GetQuoteChar() const { +        return CharResult(xmlTextReaderQuoteChar(Impl.Get())); +    } + +    TStringBuf TTextReader::GetValue() const { +        return ConstStringOrEmptyResult(xmlTextReaderConstValue(Impl.Get())); +    } + +    TTextReader::EReadState TTextReader::GetReadState() const { +        return static_cast<EReadState>(IntResult(xmlTextReaderReadState(Impl.Get()))); +    } + +    void TTextReader::Close() { +        if (xmlTextReaderClose(Impl.Get()) == -1) { +            ThrowException(); +        } +    } + +    TString TTextReader::GetAttribute(int number) const { +        return TempStringResult(xmlTextReaderGetAttributeNo(Impl.Get(), number)); +    } + +    TString TTextReader::GetAttribute(TZtStringBuf name) const { +        return TempStringResult(xmlTextReaderGetAttribute(Impl.Get(), XMLCHAR(name.data()))); +    } + +    TString TTextReader::GetAttribute(TZtStringBuf localName, TZtStringBuf nsUri) const { +        return TempStringResult(xmlTextReaderGetAttributeNs(Impl.Get(), XMLCHAR(localName.data()), XMLCHAR(nsUri.data()))); +    } + +    TString TTextReader::LookupNamespace(TZtStringBuf prefix) const { +        return TempStringResult(xmlTextReaderLookupNamespace(Impl.Get(), XMLCHAR(prefix.data()))); +    } + +    bool TTextReader::MoveToAttribute(int number) { +        return BoolResult(xmlTextReaderMoveToAttributeNo(Impl.Get(), number)); +    } + +    bool TTextReader::MoveToAttribute(TZtStringBuf name) { +        return BoolResult(xmlTextReaderMoveToAttribute(Impl.Get(), XMLCHAR(name.data()))); +    } + +    bool TTextReader::MoveToAttribute(TZtStringBuf localName, TZtStringBuf nsUri) { +        return BoolResult(xmlTextReaderMoveToAttributeNs(Impl.Get(), XMLCHAR(localName.data()), XMLCHAR(nsUri.data()))); +    } + +    bool TTextReader::MoveToFirstAttribute() { +        return BoolResult(xmlTextReaderMoveToFirstAttribute(Impl.Get())); +    } + +    bool TTextReader::MoveToNextAttribute() { +        return BoolResult(xmlTextReaderMoveToNextAttribute(Impl.Get())); +    } + +    bool TTextReader::MoveToElement() { +        return BoolResult(xmlTextReaderMoveToElement(Impl.Get())); +    } + +    TConstNode TTextReader::Expand() const { +        const xmlNodePtr node = xmlTextReaderExpand(Impl.Get()); +        if (node == nullptr) { +            ThrowException(); +        } +        return TConstNode(TNode(node->doc, node)); +    } + +    bool TTextReader::Next() { +        return BoolResult(xmlTextReaderNext(Impl.Get())); +    } + +    bool TTextReader::IsValid() const { +        return BoolResult(xmlTextReaderIsValid(Impl.Get())); +    } + +    // Callback for xmlReaderForIO() to read more data. +    // It is almost "noexcept" (std::bad_alloc may happen when saving exception message to new TString). +    // Waiting for std::exception_ptr and std::rethrow_exception from C++11 in Arcadia to make it really "noexcept". +    int TTextReader::ReadFromInputStreamCallback(void* context, char* buffer, int len) { +        Y_ASSERT(len >= 0); +        TTextReader* reader = static_cast<TTextReader*>(context); + +        int result = -1; + +        // Exception may be thrown by IInputStream::Read(). +        // It is caught unconditionally because exceptions cannot safely pass through libxml2 plain C code +        // (no destructors, no RAII, raw pointers, so in case of stack unwinding some memory gets leaked). + +        try { +            result = reader->Stream.Read(buffer, len); +        } catch (const yexception& ex) { +            reader->LogError() << "read from input stream failed: " << ex; +        } catch (...) { +            reader->LogError() << "read from input stream failed"; +        } + +        return result; +    } + +    void TTextReader::OnLibxmlError(void* arg, const char* msg, xmlParserSeverities severity, xmlTextReaderLocatorPtr locator) { +        TTextReader* reader = static_cast<TTextReader*>(arg); +        Y_ASSERT(reader != nullptr); + +        TStringStream& out = reader->LogError(); + +        if (severity == XML_PARSER_SEVERITY_ERROR) { +            out << "libxml parse error"; +        } else if (severity == XML_PARSER_SEVERITY_VALIDITY_ERROR) { +            out << "libxml validity error"; +        } else { +            out << "libxml error"; +        } + +        if (locator != nullptr) { +            const int line = xmlTextReaderLocatorLineNumber(locator); +            const TCharPtr baseUri = xmlTextReaderLocatorBaseURI(locator); +            out << " ("; +            if (line != -1) { +                out << "at line " << line; +                if (baseUri) { +                    out << ", "; +                } +            } +            if (baseUri) { +                out << "base URI " << CAST2CHAR(baseUri.Get()); +            } +            out << ")"; +        } + +        TStringBuf message = (msg != nullptr) ? msg : "unknown"; +        message = StripStringRight(message); // remove trailing \n that is added by libxml +        if (!message.empty()) { +            out << ": " << message; +        } +    } + +    void TTextReader::SetupErrorHandler() { +        xmlTextReaderErrorFunc func = nullptr; +        void* arg = nullptr; + +        // We respect any other error handlers already set up: +        xmlTextReaderGetErrorHandler(Impl.Get(), &func, &arg); +        if (!func) { +            func = TTextReader::OnLibxmlError; +            xmlTextReaderSetErrorHandler(Impl.Get(), func, this); +        } +    } + +    TStringStream& TTextReader::LogError() const { +        if (IsError) { // maybe there are previous errors +            ErrorBuffer << Endl; +        } +        IsError = true; +        return ErrorBuffer; +    } + +    void TTextReader::CheckForExceptions() const { +        if (Y_LIKELY(!IsError)) { +            return; +        } + +        const TString message = ErrorBuffer.Str(); +        ErrorBuffer.clear(); +        IsError = false; + +        ythrow yexception() << message; +    } + +    void TTextReader::ThrowException() const { +        CheckForExceptions(); +        // Probably CheckForExceptions() would throw an exception with more verbose message. As the last resort +        // (we do not even know the name of the failed libxml function, but it's possible to deduce it from stacktrace): +        ythrow yexception() << "libxml function returned error exit code"; +    } + +    bool TTextReader::BoolResult(int value) const { +        if (Y_UNLIKELY(value == -1)) { +            ThrowException(); +        } +        return (value != 0); +    } + +    int TTextReader::IntResult(int value) const { +        if (Y_UNLIKELY(value == -1)) { +            ThrowException(); +        } +        return value; +    } + +    char TTextReader::CharResult(int value) const { +        if (Y_UNLIKELY(value == -1)) { +            ThrowException(); +        } +        return static_cast<char>(value); +    } + +    TStringBuf TTextReader::ConstStringResult(const xmlChar* value) const { +        if (Y_UNLIKELY(value == nullptr)) { +            ThrowException(); +        } +        return CAST2CHAR(value); +    } + +    TStringBuf TTextReader::ConstStringOrEmptyResult(const xmlChar* value) const { +        CheckForExceptions(); +        return (value != nullptr) ? TStringBuf(CAST2CHAR(value)) : TStringBuf(); +    } + +    TString TTextReader::TempStringResult(TCharPtr value) const { +        if (Y_UNLIKELY(value == nullptr)) { +            ThrowException(); +        } +        return TString(CAST2CHAR(value.Get())); +    } + +    TString TTextReader::TempStringOrEmptyResult(TCharPtr value) const { +        CheckForExceptions(); +        return (value != nullptr) ? TString(CAST2CHAR(value.Get())) : TString(); +    } + +    struct TTextReader::TDeleter { +        static inline void Destroy(xmlTextReaderPtr handle) { +            xmlFreeTextReader(handle); +        } +    }; +} diff --git a/library/cpp/xml/document/xml-textreader.h b/library/cpp/xml/document/xml-textreader.h new file mode 100644 index 00000000000..ab4c329d26d --- /dev/null +++ b/library/cpp/xml/document/xml-textreader.h @@ -0,0 +1,325 @@ +#pragma once + +#include "xml-document.h" +#include "xml-options.h" + +#include <contrib/libs/libxml/include/libxml/xmlreader.h> + +#include <library/cpp/string_utils/ztstrbuf/ztstrbuf.h> + +#include <util/generic/noncopyable.h> +#include <util/generic/ptr.h> +#include <util/generic/strbuf.h> +#include <util/generic/string.h> +#include <functional> +#include <util/stream/input.h> +#include <util/stream/str.h> + +namespace NXml { +    /** +     * TextReader Parser +     * +     * API of the XML streaming API based on C# interfaces. +     * Provides fast, non-cached, forward-only access to XML data. +     * +     * Like the SAX parser, the TextReader parser is suitable for sequential +     * parsing, but instead of implementing handlers for specific parts of the +     * document, it allows you to detect the current node type, process the node +     * accordingly, and skip forward in the document as much as necessary. +     * +     * Unlike the DOM parser, you may not move backwards in the XML document. +     * And unlike the SAX parser, you must not waste time processing nodes that do not +     * interest you. +     * +     * All methods are on the single parser instance, but their result depends on the current context. +     * For instance, use Read() to move to the next node, and MoveToElement() to navigate to child nodes. +     * These methods will return false when no more nodes are available. Then use +     * methods such as GetName() and GetValue() to examine the elements and their attributes. +     * +     * This wrapper is inspired by TextReader from libxml++. +     */ + +    class TTextReader: private TNonCopyable { +    public: +        // strongly-typed alias for enum from xmlreader.h +        enum class ENodeType : int { +            // clang-format off +            Attribute             = XML_READER_TYPE_ATTRIBUTE, +            CDATA                 = XML_READER_TYPE_CDATA, +            Comment               = XML_READER_TYPE_COMMENT, +            Document              = XML_READER_TYPE_DOCUMENT, +            DocumentFragment      = XML_READER_TYPE_DOCUMENT_FRAGMENT, +            DocumentType          = XML_READER_TYPE_DOCUMENT_TYPE, +            Element               = XML_READER_TYPE_ELEMENT, +            EndElement            = XML_READER_TYPE_END_ELEMENT, +            EndEntity             = XML_READER_TYPE_END_ENTITY, +            Entity                = XML_READER_TYPE_ENTITY, +            EntityReference       = XML_READER_TYPE_ENTITY_REFERENCE, +            None                  = XML_READER_TYPE_NONE, +            Notation              = XML_READER_TYPE_NOTATION, +            ProcessingInstruction = XML_READER_TYPE_PROCESSING_INSTRUCTION, +            SignificantWhitespace = XML_READER_TYPE_SIGNIFICANT_WHITESPACE, +            Text                  = XML_READER_TYPE_TEXT, +            Whitespace            = XML_READER_TYPE_WHITESPACE, +            XmlDeclaration        = XML_READER_TYPE_XML_DECLARATION, +            // clang-format on +        }; + +        enum class EReadState : int { +            // clang-format off +            Closed      = XML_TEXTREADER_MODE_CLOSED, +            EndOfFile   = XML_TEXTREADER_MODE_EOF, +            Error       = XML_TEXTREADER_MODE_ERROR, +            Initial     = XML_TEXTREADER_MODE_INITIAL, +            Interactive = XML_TEXTREADER_MODE_INTERACTIVE, +            Reading     = XML_TEXTREADER_MODE_READING, +            // clang-format on +        }; + +    public: +        TTextReader(IInputStream& stream, const TOptions& options = TOptions()); +        ~TTextReader(); + +        /** +         * Moves the position of the current instance to the next node in the stream, exposing its properties. +         * @return true if the node was read successfully, false if there are no more nodes to read +         */ +        bool Read(); + +        /** +         * Reads the contents of the current node, including child nodes and markup. +         * @return A string containing the XML content, or an empty string +         *         if the current node is neither an element nor attribute, or has no child nodes +         */ +        TString ReadInnerXml() const; + +        /** +         * Reads the current node and its contents, including child nodes and markup. +         * @return A string containing the XML content, or an empty string +         *         if the current node is neither an element nor attribute +         */ +        TString ReadOuterXml() const; + +        /** +         * Reads the contents of an element or a text node as a string. +         * @return A string containing the contents of the Element or Text node, +         *         or an empty string if the reader is positioned on any other type of node +         */ +        TString ReadString() const; + +        /** +         * Parses an attribute value into one or more Text and EntityReference nodes. +         * @return A bool where true indicates the attribute value was parsed, +         *         and false indicates the reader was not positioned on an attribute node +         *         or all the attribute values have been read +         */ +        bool ReadAttributeValue() const; + +        /** +         * Gets the number of attributes on the current node. +         * @return The number of attributes on the current node, or zero if the current node +         *         does not support attributes +         */ +        int GetAttributeCount() const; + +        /** +         * Gets the base Uniform Resource Identifier (URI) of the current node. +         * @return The base URI of the current node or an empty string if not available +         */ +        TStringBuf GetBaseUri() const; + +        /** +         * Gets the depth of the current node in the XML document. +         * @return The depth of the current node in the XML document +         */ +        int GetDepth() const; + +        /** +         * Gets a value indicating whether the current node has any attributes. +         * @return true if the current has attributes, false otherwise +         */ +        bool HasAttributes() const; + +        /** +         * Whether the node can have a text value. +         * @return true if the current node can have an associated text value, false otherwise +         */ +        bool HasValue() const; + +        /** +         * Whether an Attribute node was generated from the default value defined in the DTD or schema. +         * @return true if defaulted, false otherwise +         */ +        bool IsDefault() const; + +        /** +         * Check if the current node is empty. +         * @return true if empty, false otherwise +         */ +        bool IsEmptyElement() const; + +        /** +         * The local name of the node. +         * @return the local name or empty string if not available +         */ +        TStringBuf GetLocalName() const; + +        /** +         * The qualified name of the node, equal to Prefix:LocalName. +         * @return the name or empty string if not available +         */ +        TStringBuf GetName() const; + +        /** +         * The URI defining the namespace associated with the node. +         * @return the namespace URI or empty string if not available +         */ +        TStringBuf GetNamespaceUri() const; + +        /** +         * Get the node type of the current node. +         * @return the ENodeType of the current node +         */ +        ENodeType GetNodeType() const; + +        /** +         * Get the namespace prefix associated with the current node. +         * @return the namespace prefix, or an empty string if not available +         */ +        TStringBuf GetPrefix() const; + +        /** +         * Get the quotation mark character used to enclose the value of an attribute. +         * @return " or ' +         */ +        char GetQuoteChar() const; + +        /** +         * Provides the text value of the node if present. +         * @return the string or empty if not available +         */ +        TStringBuf GetValue() const; + +        /** +         * Gets the read state of the reader. +         * @return the state value +         */ +        EReadState GetReadState() const; + +        /** +         * This method releases any resources allocated by the current instance +         * changes the state to Closed and close any underlying input. +         */ +        void Close(); + +        /** +         * Provides the value of the attribute with the specified index relative to the containing element. +         * @param number the zero-based index of the attribute relative to the containing element +         */ +        TString GetAttribute(int number) const; + +        /** +         * Provides the value of the attribute with the specified qualified name. +         * @param name the qualified name of the attribute +         */ +        TString GetAttribute(TZtStringBuf name) const; + +        /** +         * Provides the value of the specified attribute. +         * @param localName the local name of the attribute +         * @param nsUri the namespace URI of the attribute +         */ +        TString GetAttribute(TZtStringBuf localName, TZtStringBuf nsUri) const; + +        /** +         * Resolves a namespace prefix in the scope of the current element. +         * @param prefix the prefix whose namespace URI is to be resolved. To return the default namespace, specify empty string. +         * @return a string containing the namespace URI to which the prefix maps. +         */ +        TString LookupNamespace(TZtStringBuf prefix) const; + +        /** +         * Moves the position of the current instance to the attribute with the specified index relative to the containing element. +         * @param number the zero-based index of the attribute relative to the containing element +         * @return true in case of success, false if not found +         */ +        bool MoveToAttribute(int number); + +        /** +         * Moves the position of the current instance to the attribute with the specified qualified name. +         * @param name the qualified name of the attribute +         * @return true in case of success, false if not found +         */ +        bool MoveToAttribute(TZtStringBuf name); + +        /** +         * Moves the position of the current instance to the attribute with the specified local name and namespace URI. +         * @param localName the local name of the attribute +         * @param nsUri the namespace URI of the attribute +         * @return true in case of success, false if not found +         */ +        bool MoveToAttribute(TZtStringBuf localName, TZtStringBuf nsUri); + +        /** +         * Moves the position of the current instance to the first attribute associated with the current node. +         * @return true in case of success, false if not found +         */ +        bool MoveToFirstAttribute(); + +        /** +         * Moves the position of the current instance to the next attribute associated with the current node. +         * @return true in case of success, false if not found +         */ +        bool MoveToNextAttribute(); + +        /** +         * Moves the position of the current instance to the node that contains the current Attribute node. +         * @return true in case of success, false if not found +         */ +        bool MoveToElement(); + +        /** +         * Reads the contents of the current node and the full subtree. It then makes the subtree available until the next Read() call. +         */ +        TConstNode Expand() const; + +        /** +         * Skip to the node following the current one in document order while avoiding the subtree if any. +         * @return true if the node was read successfully, false if there is no more nodes to read +         */ +        bool Next(); + +        /** +         * Retrieve the validity status from the parser context. +         */ +        bool IsValid() const; + +    private: +        static int ReadFromInputStreamCallback(void* context, char* buffer, int len); +        static void OnLibxmlError(void* arg, const char* msg, xmlParserSeverities severity, xmlTextReaderLocatorPtr locator); + +        void SetupErrorHandler(); +        TStringStream& LogError() const; +        void CheckForExceptions() const; +        void ThrowException() const; + +        // helpers that check return codes of C functions from libxml +        bool BoolResult(int value) const; +        int IntResult(int value) const; +        char CharResult(int value) const; +        TStringBuf ConstStringResult(const xmlChar* value) const; +        TStringBuf ConstStringOrEmptyResult(const xmlChar* value) const; +        TString TempStringResult(TCharPtr value) const; +        TString TempStringOrEmptyResult(TCharPtr value) const; + +    private: +        IInputStream& Stream; + +        mutable bool IsError; +        mutable TStringStream ErrorBuffer; + +        struct TDeleter; +        THolder<xmlTextReader, TDeleter> Impl; +    }; + +} diff --git a/library/cpp/xml/document/xml-textreader_ut.cpp b/library/cpp/xml/document/xml-textreader_ut.cpp new file mode 100644 index 00000000000..6232dfe47e2 --- /dev/null +++ b/library/cpp/xml/document/xml-textreader_ut.cpp @@ -0,0 +1,290 @@ +#include "xml-textreader.h" + +#include <library/cpp/testing/unittest/registar.h> + +#include <util/generic/hash.h> +#include <util/generic/vector.h> +#include <util/string/join.h> + +namespace { +    /** +     * Simple wrapper around the xmlTextReader wrapper +     */ +    void ParseXml(const TString& xmlData, +                  std::function<void(NXml::TConstNode)> nodeHandlerFunc, +                  const TString& localName, +                  const TString& namespaceUri = TString()) { +        TStringInput in(xmlData); +        NXml::TTextReader reader(in); + +        while (reader.Read()) { +            if (reader.GetNodeType() == NXml::TTextReader::ENodeType::Element && +                reader.GetLocalName() == localName && +                reader.GetNamespaceUri() == namespaceUri) +            { +                const NXml::TConstNode node = reader.Expand(); +                nodeHandlerFunc(node); +            } +        } +    } +} + +Y_UNIT_TEST_SUITE(TestXmlTextReader) { +    Y_UNIT_TEST(BasicExample) { +        const TString xml = "<?xml version=\"1.0\"?>\n" +                            "<example toto=\"1\">\n" +                            "  <examplechild id=\"1\">\n" +                            "    <child_of_child/>\n" +                            "  </examplechild>\n" +                            "  <examplechild id=\"2\" toto=\"3\">\n" +                            "    <child_of_child>Some content : -)</child_of_child>\n" +                            "  </examplechild>\n" +                            "</example>\n"; + +        TStringInput input(xml); +        NXml::TTextReader reader(input); + +        using ENT = NXml::TTextReader::ENodeType; + +        struct TItem { +            int Depth; +            ENT Type; +            TString Name; +            TString Attrs; +            TString Value; +        }; + +        TVector<TItem> found; +        TVector<TString> msgs; + +        while (reader.Read()) { +            // dump attributes as "k1: v1, k2: v2, ..." +            TVector<TString> kv; +            if (reader.HasAttributes()) { +                reader.MoveToFirstAttribute(); +                do { +                    kv.push_back(TString::Join(reader.GetName(), ": ", reader.GetValue())); +                } while (reader.MoveToNextAttribute()); +                reader.MoveToElement(); +            } + +            found.push_back(TItem{ +                reader.GetDepth(), +                reader.GetNodeType(), +                TString(reader.GetName()), +                JoinSeq(", ", kv), +                reader.HasValue() ? TString(reader.GetValue()) : TString(), +            }); +        } + +        const TVector<TItem> expected = { +            TItem{0, ENT::Element, "example", "toto: 1", ""}, +            TItem{1, ENT::SignificantWhitespace, "#text", "", "\n  "}, +            TItem{1, ENT::Element, "examplechild", "id: 1", ""}, +            TItem{2, ENT::SignificantWhitespace, "#text", "", "\n    "}, +            TItem{2, ENT::Element, "child_of_child", "", ""}, +            TItem{2, ENT::SignificantWhitespace, "#text", "", "\n  "}, +            TItem{1, ENT::EndElement, "examplechild", "id: 1", ""}, +            TItem{1, ENT::SignificantWhitespace, "#text", "", "\n  "}, +            TItem{1, ENT::Element, "examplechild", "id: 2, toto: 3", ""}, +            TItem{2, ENT::SignificantWhitespace, "#text", "", "\n    "}, +            TItem{2, ENT::Element, "child_of_child", "", ""}, +            TItem{3, ENT::Text, "#text", "", "Some content : -)"}, +            TItem{2, ENT::EndElement, "child_of_child", "", ""}, +            TItem{2, ENT::SignificantWhitespace, "#text", "", "\n  "}, +            TItem{1, ENT::EndElement, "examplechild", "id: 2, toto: 3", ""}, +            TItem{1, ENT::SignificantWhitespace, "#text", "", "\n"}, +            TItem{0, ENT::EndElement, "example", "toto: 1", ""}}; + +        UNIT_ASSERT_VALUES_EQUAL(found.size(), expected.size()); + +        for (size_t i = 0; i < expected.size(); ++i) { +            UNIT_ASSERT_VALUES_EQUAL_C(found[i].Depth, expected[i].Depth, "line " << i); +            UNIT_ASSERT_EQUAL_C(found[i].Type, expected[i].Type, "line " << i); +            UNIT_ASSERT_VALUES_EQUAL_C(found[i].Name, expected[i].Name, "line " << i); +            UNIT_ASSERT_VALUES_EQUAL_C(found[i].Attrs, expected[i].Attrs, "line " << i); +            UNIT_ASSERT_VALUES_EQUAL_C(found[i].Value, expected[i].Value, "line " << i); +        } +    } + +    const TString GEODATA = "<?xml version=\"1.0\" encoding=\"utf-8\"?>" +                            "<root>" +                            "" +                            "  <country id=\"225\">" +                            "    <name>Россия</name>" +                            "    <cities>" +                            "      <city>Москва</city>" +                            "      <city>Санкт-Петербург</city>" +                            "    </cities>" +                            "  </country>" +                            "" +                            "  <country id=\"149\">" +                            "    <name>Беларусь</name>" +                            "    <cities>" +                            "      <city>Минск</city>" +                            "    </cities>" +                            "  </country>" +                            "" +                            "  <country id=\"187\">" +                            "    <name>Украина</name>" +                            "    <cities>" +                            "      <city>Киев</city>" +                            "    </cities>" +                            "  </country>" +                            "" +                            "</root>"; + +    Y_UNIT_TEST(ParseXmlSimple) { +        struct TCountry { +            TString Name; +            TVector<TString> Cities; +        }; + +        THashMap<int, TCountry> data; + +        auto handler = [&data](NXml::TConstNode node) { +            const int id = node.Attr<int>("id"); + +            TCountry& c = data[id]; + +            c.Name = node.FirstChild("name").Value<TString>(); + +            const NXml::TConstNodes cityNodes = node.Nodes("cities/city"); +            for (auto cityNode : cityNodes) { +                c.Cities.push_back(cityNode.Value<TString>()); +            } +        }; + +        ParseXml(GEODATA, handler, "country"); + +        UNIT_ASSERT_EQUAL(data.size(), 3); + +        UNIT_ASSERT(data.contains(225)); +        const TCountry& russia = data.at(225); +        UNIT_ASSERT_EQUAL(russia.Name, "Россия"); +        UNIT_ASSERT_EQUAL(russia.Cities.size(), 2); +        UNIT_ASSERT_EQUAL(russia.Cities[0], "Москва"); +        UNIT_ASSERT_EQUAL(russia.Cities[1], "Санкт-Петербург"); + +        UNIT_ASSERT(data.contains(149)); +        const TCountry& belarus = data.at(149); +        UNIT_ASSERT_EQUAL(belarus.Name, "Беларусь"); +        UNIT_ASSERT_EQUAL(belarus.Cities.size(), 1); +        UNIT_ASSERT_EQUAL(belarus.Cities[0], "Минск"); + +        UNIT_ASSERT(data.contains(187)); +        const TCountry& ukraine = data.at(187); +        UNIT_ASSERT_EQUAL(ukraine.Name, "Украина"); +        UNIT_ASSERT_EQUAL(ukraine.Cities.size(), 1); +        UNIT_ASSERT_EQUAL(ukraine.Cities[0], "Киев"); +    } + +    Y_UNIT_TEST(ParseXmlDeepLevel) { +        TVector<TString> cities; + +        auto handler = [&cities](NXml::TConstNode node) { +            cities.push_back(node.Value<TString>()); +        }; + +        ParseXml(GEODATA, handler, "city"); + +        UNIT_ASSERT_EQUAL(cities.size(), 4); +        UNIT_ASSERT_EQUAL(cities[0], "Москва"); +        UNIT_ASSERT_EQUAL(cities[1], "Санкт-Петербург"); +        UNIT_ASSERT_EQUAL(cities[2], "Минск"); +        UNIT_ASSERT_EQUAL(cities[3], "Киев"); +    } + +    Y_UNIT_TEST(ParseXmlException) { +        // Check that exception properly passes through plain C code of libxml, +        // no leaks are detected by valgrind. +        auto handler = [](NXml::TConstNode node) { +            const int id = node.Attr<int>("id"); +            if (id != 225) { +                ythrow yexception() << "unsupported id: " << id; +            } +        }; + +        UNIT_ASSERT_EXCEPTION(ParseXml(GEODATA, handler, "country"), yexception); +        UNIT_ASSERT_EXCEPTION(ParseXml("<a></b>", handler, "a"), yexception); +        UNIT_ASSERT_EXCEPTION(ParseXml("<root><a id=\"1\"></a><a id=\"2\"></b></root>", handler, "a"), yexception); +        UNIT_ASSERT_EXCEPTION(ParseXml("<root><a id=\"1\"></a><a id=\"2></a></root>", handler, "a"), yexception); +    } + +    const TString BACKA = // UTF-8 encoding is used implicitly +        "<Companies" +        "    xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"" +        "    xmlns=\"http://maps.yandex.ru/backa/1.x\"" +        "    xmlns:atom=\"http://www.w3.org/2005/Atom\"" +        "    xmlns:biz=\"http://maps.yandex.ru/business/1.x\"" +        "    xmlns:xal=\"urn:oasis:names:tc:ciq:xsdschema:xAL:2.0\"" +        "    xmlns:gml=\"http://www.opengis.net/gml\"" +        ">" +        "" +        "  <Company id=\"0001\">" +        "    <Geo>" +        "      <Location>" +        "        <gml:pos>37.62669 55.664827</gml:pos>" +        "        <kind>house</kind>" +        "      </Location>" +        "      <AddressDetails xmlns=\"urn:oasis:names:tc:ciq:xsdschema:xAL:2.0\">" +        "        <Country>" +        "          <AddressLine xml:lang=\"ru\">Москва, Каширское ш., 14</AddressLine>" +        "        </Country>" +        "      </AddressDetails>" +        "    </Geo>" +        "  </Company>" +        "" +        "  <Company id=\"0002\">" +        "    <Geo>" +        "      <Location>" +        "        <pos xmlns=\"http://www.opengis.net/gml\">150.819797 59.56092</pos>" +        "        <kind>locality</kind>" +        "      </Location>" +        "      <xal:AddressDetails>" +        "        <xal:Country>" +        "          <xal:AddressLine xml:lang=\"ru\">Магадан, ул. Пролетарская, 43</xal:AddressLine>" +        "        </xal:Country>" +        "      </xal:AddressDetails>" +        "    </Geo>" +        "  </Company>" +        "" +        "</Companies>"; + +    Y_UNIT_TEST(NamespaceHell) { +        using TNS = NXml::TNamespaceForXPath; +        const NXml::TNamespacesForXPath ns = { +            TNS{"b", "http://maps.yandex.ru/backa/1.x"}, +            TNS{"gml", "http://www.opengis.net/gml"}, +            TNS{"xal", "urn:oasis:names:tc:ciq:xsdschema:xAL:2.0"}}; + +        int count = 0; +        THashMap<TString, TString> positions; +        THashMap<TString, TString> addresses; + +        auto handler = [&](NXml::TConstNode node) { +            count++; +            const auto id = node.Attr<TString>("id"); + +            NXml::TXPathContextPtr ctxt = node.CreateXPathContext(ns); + +            const NXml::TConstNode location = node.Node("b:Geo/b:Location", false, *ctxt); +            positions[id] = location.Node("gml:pos", false, *ctxt).Value<TString>(); +            addresses[id] = node.Node("b:Geo/xal:AddressDetails/xal:Country/xal:AddressLine", false, *ctxt).Value<TString>(); +        }; + +        ParseXml(BACKA, handler, "Company"); +        UNIT_ASSERT_EQUAL(count, 0); +        // nothing found because namespace was not specified + +        ParseXml(BACKA, handler, "Company", "http://maps.yandex.ru/backa/1.x"); + +        UNIT_ASSERT_VALUES_EQUAL(count, 2); + +        UNIT_ASSERT_VALUES_EQUAL(positions["0001"], "37.62669 55.664827"); +        UNIT_ASSERT_VALUES_EQUAL(positions["0002"], "150.819797 59.56092"); + +        UNIT_ASSERT_VALUES_EQUAL(addresses["0001"], "Москва, Каширское ш., 14"); +        UNIT_ASSERT_VALUES_EQUAL(addresses["0002"], "Магадан, ул. Пролетарская, 43"); +    } +} diff --git a/library/cpp/xml/document/ya.make b/library/cpp/xml/document/ya.make new file mode 100644 index 00000000000..86bbd639cf8 --- /dev/null +++ b/library/cpp/xml/document/ya.make @@ -0,0 +1,17 @@ +LIBRARY() + +OWNER(finder) + +SRCS( +    xml-document.cpp +    xml-textreader.cpp +    xml-options.cpp +) + +PEERDIR( +    library/cpp/xml/init +    contrib/libs/libxml +    library/cpp/string_utils/ztstrbuf +) + +END() | 
