diff options
author | Devtools Arcadia <arcadia-devtools@yandex-team.ru> | 2022-02-07 18:08:42 +0300 |
---|---|---|
committer | Devtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net> | 2022-02-07 18:08:42 +0300 |
commit | 1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch) | |
tree | e26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/xml/document/xml-textreader_ut.cpp | |
download | ydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz |
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/xml/document/xml-textreader_ut.cpp')
-rw-r--r-- | library/cpp/xml/document/xml-textreader_ut.cpp | 290 |
1 files changed, 290 insertions, 0 deletions
diff --git a/library/cpp/xml/document/xml-textreader_ut.cpp b/library/cpp/xml/document/xml-textreader_ut.cpp new file mode 100644 index 0000000000..6232dfe47e --- /dev/null +++ b/library/cpp/xml/document/xml-textreader_ut.cpp @@ -0,0 +1,290 @@ +#include "xml-textreader.h" + +#include <library/cpp/testing/unittest/registar.h> + +#include <util/generic/hash.h> +#include <util/generic/vector.h> +#include <util/string/join.h> + +namespace { + /** + * Simple wrapper around the xmlTextReader wrapper + */ + void ParseXml(const TString& xmlData, + std::function<void(NXml::TConstNode)> nodeHandlerFunc, + const TString& localName, + const TString& namespaceUri = TString()) { + TStringInput in(xmlData); + NXml::TTextReader reader(in); + + while (reader.Read()) { + if (reader.GetNodeType() == NXml::TTextReader::ENodeType::Element && + reader.GetLocalName() == localName && + reader.GetNamespaceUri() == namespaceUri) + { + const NXml::TConstNode node = reader.Expand(); + nodeHandlerFunc(node); + } + } + } +} + +Y_UNIT_TEST_SUITE(TestXmlTextReader) { + Y_UNIT_TEST(BasicExample) { + const TString xml = "<?xml version=\"1.0\"?>\n" + "<example toto=\"1\">\n" + " <examplechild id=\"1\">\n" + " <child_of_child/>\n" + " </examplechild>\n" + " <examplechild id=\"2\" toto=\"3\">\n" + " <child_of_child>Some content : -)</child_of_child>\n" + " </examplechild>\n" + "</example>\n"; + + TStringInput input(xml); + NXml::TTextReader reader(input); + + using ENT = NXml::TTextReader::ENodeType; + + struct TItem { + int Depth; + ENT Type; + TString Name; + TString Attrs; + TString Value; + }; + + TVector<TItem> found; + TVector<TString> msgs; + + while (reader.Read()) { + // dump attributes as "k1: v1, k2: v2, ..." + TVector<TString> kv; + if (reader.HasAttributes()) { + reader.MoveToFirstAttribute(); + do { + kv.push_back(TString::Join(reader.GetName(), ": ", reader.GetValue())); + } while (reader.MoveToNextAttribute()); + reader.MoveToElement(); + } + + found.push_back(TItem{ + reader.GetDepth(), + reader.GetNodeType(), + TString(reader.GetName()), + JoinSeq(", ", kv), + reader.HasValue() ? TString(reader.GetValue()) : TString(), + }); + } + + const TVector<TItem> expected = { + TItem{0, ENT::Element, "example", "toto: 1", ""}, + TItem{1, ENT::SignificantWhitespace, "#text", "", "\n "}, + TItem{1, ENT::Element, "examplechild", "id: 1", ""}, + TItem{2, ENT::SignificantWhitespace, "#text", "", "\n "}, + TItem{2, ENT::Element, "child_of_child", "", ""}, + TItem{2, ENT::SignificantWhitespace, "#text", "", "\n "}, + TItem{1, ENT::EndElement, "examplechild", "id: 1", ""}, + TItem{1, ENT::SignificantWhitespace, "#text", "", "\n "}, + TItem{1, ENT::Element, "examplechild", "id: 2, toto: 3", ""}, + TItem{2, ENT::SignificantWhitespace, "#text", "", "\n "}, + TItem{2, ENT::Element, "child_of_child", "", ""}, + TItem{3, ENT::Text, "#text", "", "Some content : -)"}, + TItem{2, ENT::EndElement, "child_of_child", "", ""}, + TItem{2, ENT::SignificantWhitespace, "#text", "", "\n "}, + TItem{1, ENT::EndElement, "examplechild", "id: 2, toto: 3", ""}, + TItem{1, ENT::SignificantWhitespace, "#text", "", "\n"}, + TItem{0, ENT::EndElement, "example", "toto: 1", ""}}; + + UNIT_ASSERT_VALUES_EQUAL(found.size(), expected.size()); + + for (size_t i = 0; i < expected.size(); ++i) { + UNIT_ASSERT_VALUES_EQUAL_C(found[i].Depth, expected[i].Depth, "line " << i); + UNIT_ASSERT_EQUAL_C(found[i].Type, expected[i].Type, "line " << i); + UNIT_ASSERT_VALUES_EQUAL_C(found[i].Name, expected[i].Name, "line " << i); + UNIT_ASSERT_VALUES_EQUAL_C(found[i].Attrs, expected[i].Attrs, "line " << i); + UNIT_ASSERT_VALUES_EQUAL_C(found[i].Value, expected[i].Value, "line " << i); + } + } + + const TString GEODATA = "<?xml version=\"1.0\" encoding=\"utf-8\"?>" + "<root>" + "" + " <country id=\"225\">" + " <name>Россия</name>" + " <cities>" + " <city>Москва</city>" + " <city>Санкт-Петербург</city>" + " </cities>" + " </country>" + "" + " <country id=\"149\">" + " <name>Беларусь</name>" + " <cities>" + " <city>Минск</city>" + " </cities>" + " </country>" + "" + " <country id=\"187\">" + " <name>Украина</name>" + " <cities>" + " <city>Киев</city>" + " </cities>" + " </country>" + "" + "</root>"; + + Y_UNIT_TEST(ParseXmlSimple) { + struct TCountry { + TString Name; + TVector<TString> Cities; + }; + + THashMap<int, TCountry> data; + + auto handler = [&data](NXml::TConstNode node) { + const int id = node.Attr<int>("id"); + + TCountry& c = data[id]; + + c.Name = node.FirstChild("name").Value<TString>(); + + const NXml::TConstNodes cityNodes = node.Nodes("cities/city"); + for (auto cityNode : cityNodes) { + c.Cities.push_back(cityNode.Value<TString>()); + } + }; + + ParseXml(GEODATA, handler, "country"); + + UNIT_ASSERT_EQUAL(data.size(), 3); + + UNIT_ASSERT(data.contains(225)); + const TCountry& russia = data.at(225); + UNIT_ASSERT_EQUAL(russia.Name, "Россия"); + UNIT_ASSERT_EQUAL(russia.Cities.size(), 2); + UNIT_ASSERT_EQUAL(russia.Cities[0], "Москва"); + UNIT_ASSERT_EQUAL(russia.Cities[1], "Санкт-Петербург"); + + UNIT_ASSERT(data.contains(149)); + const TCountry& belarus = data.at(149); + UNIT_ASSERT_EQUAL(belarus.Name, "Беларусь"); + UNIT_ASSERT_EQUAL(belarus.Cities.size(), 1); + UNIT_ASSERT_EQUAL(belarus.Cities[0], "Минск"); + + UNIT_ASSERT(data.contains(187)); + const TCountry& ukraine = data.at(187); + UNIT_ASSERT_EQUAL(ukraine.Name, "Украина"); + UNIT_ASSERT_EQUAL(ukraine.Cities.size(), 1); + UNIT_ASSERT_EQUAL(ukraine.Cities[0], "Киев"); + } + + Y_UNIT_TEST(ParseXmlDeepLevel) { + TVector<TString> cities; + + auto handler = [&cities](NXml::TConstNode node) { + cities.push_back(node.Value<TString>()); + }; + + ParseXml(GEODATA, handler, "city"); + + UNIT_ASSERT_EQUAL(cities.size(), 4); + UNIT_ASSERT_EQUAL(cities[0], "Москва"); + UNIT_ASSERT_EQUAL(cities[1], "Санкт-Петербург"); + UNIT_ASSERT_EQUAL(cities[2], "Минск"); + UNIT_ASSERT_EQUAL(cities[3], "Киев"); + } + + Y_UNIT_TEST(ParseXmlException) { + // Check that exception properly passes through plain C code of libxml, + // no leaks are detected by valgrind. + auto handler = [](NXml::TConstNode node) { + const int id = node.Attr<int>("id"); + if (id != 225) { + ythrow yexception() << "unsupported id: " << id; + } + }; + + UNIT_ASSERT_EXCEPTION(ParseXml(GEODATA, handler, "country"), yexception); + UNIT_ASSERT_EXCEPTION(ParseXml("<a></b>", handler, "a"), yexception); + UNIT_ASSERT_EXCEPTION(ParseXml("<root><a id=\"1\"></a><a id=\"2\"></b></root>", handler, "a"), yexception); + UNIT_ASSERT_EXCEPTION(ParseXml("<root><a id=\"1\"></a><a id=\"2></a></root>", handler, "a"), yexception); + } + + const TString BACKA = // UTF-8 encoding is used implicitly + "<Companies" + " xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"" + " xmlns=\"http://maps.yandex.ru/backa/1.x\"" + " xmlns:atom=\"http://www.w3.org/2005/Atom\"" + " xmlns:biz=\"http://maps.yandex.ru/business/1.x\"" + " xmlns:xal=\"urn:oasis:names:tc:ciq:xsdschema:xAL:2.0\"" + " xmlns:gml=\"http://www.opengis.net/gml\"" + ">" + "" + " <Company id=\"0001\">" + " <Geo>" + " <Location>" + " <gml:pos>37.62669 55.664827</gml:pos>" + " <kind>house</kind>" + " </Location>" + " <AddressDetails xmlns=\"urn:oasis:names:tc:ciq:xsdschema:xAL:2.0\">" + " <Country>" + " <AddressLine xml:lang=\"ru\">Москва, Каширское ш., 14</AddressLine>" + " </Country>" + " </AddressDetails>" + " </Geo>" + " </Company>" + "" + " <Company id=\"0002\">" + " <Geo>" + " <Location>" + " <pos xmlns=\"http://www.opengis.net/gml\">150.819797 59.56092</pos>" + " <kind>locality</kind>" + " </Location>" + " <xal:AddressDetails>" + " <xal:Country>" + " <xal:AddressLine xml:lang=\"ru\">Магадан, ул. Пролетарская, 43</xal:AddressLine>" + " </xal:Country>" + " </xal:AddressDetails>" + " </Geo>" + " </Company>" + "" + "</Companies>"; + + Y_UNIT_TEST(NamespaceHell) { + using TNS = NXml::TNamespaceForXPath; + const NXml::TNamespacesForXPath ns = { + TNS{"b", "http://maps.yandex.ru/backa/1.x"}, + TNS{"gml", "http://www.opengis.net/gml"}, + TNS{"xal", "urn:oasis:names:tc:ciq:xsdschema:xAL:2.0"}}; + + int count = 0; + THashMap<TString, TString> positions; + THashMap<TString, TString> addresses; + + auto handler = [&](NXml::TConstNode node) { + count++; + const auto id = node.Attr<TString>("id"); + + NXml::TXPathContextPtr ctxt = node.CreateXPathContext(ns); + + const NXml::TConstNode location = node.Node("b:Geo/b:Location", false, *ctxt); + positions[id] = location.Node("gml:pos", false, *ctxt).Value<TString>(); + addresses[id] = node.Node("b:Geo/xal:AddressDetails/xal:Country/xal:AddressLine", false, *ctxt).Value<TString>(); + }; + + ParseXml(BACKA, handler, "Company"); + UNIT_ASSERT_EQUAL(count, 0); + // nothing found because namespace was not specified + + ParseXml(BACKA, handler, "Company", "http://maps.yandex.ru/backa/1.x"); + + UNIT_ASSERT_VALUES_EQUAL(count, 2); + + UNIT_ASSERT_VALUES_EQUAL(positions["0001"], "37.62669 55.664827"); + UNIT_ASSERT_VALUES_EQUAL(positions["0002"], "150.819797 59.56092"); + + UNIT_ASSERT_VALUES_EQUAL(addresses["0001"], "Москва, Каширское ш., 14"); + UNIT_ASSERT_VALUES_EQUAL(addresses["0002"], "Магадан, ул. Пролетарская, 43"); + } +} |