From ecf443683811f2f5190606456b8480ece7c9b7e4 Mon Sep 17 00:00:00 2001 From: gbrun Date: Mon, 1 Jun 2026 13:14:33 +0300 Subject: [libxml] add parseOptions Added the ability to explicitly specify parsing options. commit_hash:1bd7947cfc298f0c3edc895a77c64f70504b78d5 --- library/cpp/xml/document/xml-document-decl.h | 9 +++++---- library/cpp/xml/document/xml-document.cpp | 14 +++++++------- library/cpp/xml/document/xml-document_ut.cpp | 26 ++++++++++++++++++++++++++ 3 files changed, 38 insertions(+), 11 deletions(-) (limited to 'library/cpp/xml') diff --git a/library/cpp/xml/document/xml-document-decl.h b/library/cpp/xml/document/xml-document-decl.h index 643ba664680..552304bb520 100644 --- a/library/cpp/xml/document/xml-document-decl.h +++ b/library/cpp/xml/document/xml-document-decl.h @@ -29,10 +29,11 @@ namespace NXml { /** * create TDocument * @param source: filename, XML string, or name for the root element (depends on @src) - * @param src: source type: File | String | RootName + * @param type: source type: File | String | RootName + * @param parseOptions: XML parser options * throws if file not found or cannot be parsed */ - TDocument(const TString& source, Source type = File); + TDocument(const TString& source, Source type = File, int parseOptions = XML_PARSE_NOCDATA); public: TDocument(const TDocument& that) = delete; @@ -71,8 +72,8 @@ namespace NXml { } private: - void ParseFile(const TString& file); - void ParseString(TZtStringBuf xml); + void ParseFile(const TString& file, int parseOptions); + void ParseString(TZtStringBuf xml, int parseOptions); TDocument(TDocHolder doc) : Doc(std::move(doc)) diff --git a/library/cpp/xml/document/xml-document.cpp b/library/cpp/xml/document/xml-document.cpp index 25c0ed6a17e..8f9b303c916 100644 --- a/library/cpp/xml/document/xml-document.cpp +++ b/library/cpp/xml/document/xml-document.cpp @@ -17,13 +17,13 @@ namespace { } namespace NXml { - TDocument::TDocument(const TString& xml, Source type) { + TDocument::TDocument(const TString& xml, Source type, int parseOptions) { switch (type) { case File: - ParseFile(xml); + ParseFile(xml, parseOptions); break; case String: - ParseString(xml); + ParseString(xml, parseOptions); break; case RootName: { TDocHolder doc(xmlNewDoc(XMLCHAR("1.0"))); @@ -55,7 +55,7 @@ namespace NXml { return *this; } - void TDocument::ParseFile(const TString& file) { + void TDocument::ParseFile(const TString& file, int parseOptions) { if (!NFs::Exists(file)) THROW(XmlException, "File " << file << " doesn't exist"); @@ -63,7 +63,7 @@ namespace NXml { if (!pctx) THROW(XmlException, "Can't create parser context"); - TDocHolder doc(xmlCtxtReadFile(pctx.Get(), file.c_str(), nullptr, XML_PARSE_NOCDATA)); + TDocHolder doc(xmlCtxtReadFile(pctx.Get(), file.c_str(), nullptr, parseOptions)); if (!doc) THROW(XmlException, "Can't parse file " << file); @@ -75,12 +75,12 @@ namespace NXml { Doc = std::move(doc); } - void TDocument::ParseString(TZtStringBuf xml) { + void TDocument::ParseString(TZtStringBuf xml, int parseOptions) { TParserCtxtPtr pctx(xmlNewParserCtxt()); if (pctx.Get() == nullptr) THROW(XmlException, "Can't create parser context"); - TDocHolder doc(xmlCtxtReadMemory(pctx.Get(), xml.c_str(), (int)xml.size(), nullptr, nullptr, XML_PARSE_NOCDATA)); + TDocHolder doc(xmlCtxtReadMemory(pctx.Get(), xml.c_str(), (int)xml.size(), nullptr, nullptr, parseOptions)); if (!doc) THROW(XmlException, "Can't parse string"); diff --git a/library/cpp/xml/document/xml-document_ut.cpp b/library/cpp/xml/document/xml-document_ut.cpp index 0ec1fc60838..b4af2cdccf3 100644 --- a/library/cpp/xml/document/xml-document_ut.cpp +++ b/library/cpp/xml/document/xml-document_ut.cpp @@ -338,4 +338,30 @@ Y_UNIT_TEST_SUITE(TestXmlDocument) { root.SetAttr("quux", "literal"); root.SetAttr("frob", 500); } + + Y_UNIT_TEST(Cdata) { + using namespace NXml; + const TString xml = "\n" + " test\n"; + + TDocument docDefault(xml, TDocument::String); + UNIT_ASSERT(docDefault.Root().FirstChild().IsText()); + + TDocument docWithCdata(xml, TDocument::String, XML_PARSE_NOCDATA); + UNIT_ASSERT(docWithCdata.Root().FirstChild().IsText()); + + TDocument docWithoutCdata(xml, TDocument::String, 0); + UNIT_ASSERT(!docWithoutCdata.Root().FirstChild().IsText()); + } + + Y_UNIT_TEST(InvalidXml) { + using namespace NXml; + const TString xml = "\n" + "value\n"; + + UNIT_CHECK_GENERATED_EXCEPTION(TDocument(xml, TDocument::String), yexception); + + TDocument doc(xml, TDocument::String, XML_PARSE_RECOVER); + UNIT_ASSERT_EQUAL(doc.Root().FirstChild().Value(), "value"); + } } -- cgit v1.3